From 23c42a403a9cfdbad6004a556c927be7dd61a8ee Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 27 Oct 2018 15:07:40 +0200 Subject: netfilter: ipset: Introduction of new commands and protocol version 7 Two new commands (IPSET_CMD_GET_BYNAME, IPSET_CMD_GET_BYINDEX) are introduced. The new commands makes possible to eliminate the getsockopt operation (in iptables set/SET match/target) and thus use only netlink communication between userspace and kernel for ipset. With the new protocol version, userspace can exactly know which functionality is supported by the running kernel. Both the kernel and userspace is fully backward compatible. Signed-off-by: Jozsef Kadlecsik --- include/uapi/linux/netfilter/ipset/ip_set.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 60236f694143..ea69ca21ff23 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -13,8 +13,9 @@ #include -/* The protocol version */ -#define IPSET_PROTOCOL 6 +/* The protocol versions */ +#define IPSET_PROTOCOL 7 +#define IPSET_PROTOCOL_MIN 6 /* The max length of strings including NUL: set and type identifiers */ #define IPSET_MAXNAMELEN 32 @@ -38,17 +39,19 @@ enum ipset_cmd { IPSET_CMD_TEST, /* 11: Test an element in a set */ IPSET_CMD_HEADER, /* 12: Get set header data only */ IPSET_CMD_TYPE, /* 13: Get set type */ + IPSET_CMD_GET_BYNAME, /* 14: Get set index by name */ + IPSET_CMD_GET_BYINDEX, /* 15: Get set name by index */ IPSET_MSG_MAX, /* Netlink message commands */ /* Commands in userspace: */ - IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */ - IPSET_CMD_HELP, /* 15: Get help */ - IPSET_CMD_VERSION, /* 16: Get program version */ - IPSET_CMD_QUIT, /* 17: Quit from interactive mode */ + IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 16: Enter restore mode */ + IPSET_CMD_HELP, /* 17: Get help */ + IPSET_CMD_VERSION, /* 18: Get program version */ + IPSET_CMD_QUIT, /* 19: Quit from interactive mode */ IPSET_CMD_MAX, - IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */ + IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 20: Commit buffered commands */ }; /* Attributes at command level */ @@ -66,6 +69,7 @@ enum { IPSET_ATTR_LINENO, /* 9: Restore lineno */ IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */ IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */ + IPSET_ATTR_INDEX, /* 11: Kernel index of set */ __IPSET_ATTR_CMD_MAX, }; #define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1) @@ -223,6 +227,7 @@ enum ipset_adt { /* Sets are identified by an index in kernel space. Tweak with ip_set_id_t * and IPSET_INVALID_ID if you want to increase the max number of sets. + * Also, IPSET_ATTR_INDEX must be changed. */ typedef __u16 ip_set_id_t; -- cgit v1.2.3 From e20cf8d3f1f763ad28a9cb3b41305b8a8a42653e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:29 +0100 Subject: udp: implement GRO for plain UDP sockets. This is the RX counterpart of commit bec1f6f69736 ("udp: generate gso with UDP_SEGMENT"). When UDP_GRO is enabled, such socket is also eligible for GRO in the rx path: UDP segments directed to such socket are assembled into a larger GSO_UDP_L4 packet. The core UDP GRO support is enabled with setsockopt(UDP_GRO). Initial benchmark numbers: Before: udp rx: 1079 MB/s 769065 calls/s After: udp rx: 1466 MB/s 24877 calls/s This change introduces a side effect in respect to UDP tunnels: after a UDP tunnel creation, now the kernel performs a lookup per ingress UDP packet, while before such lookup happened only if the ingress packet carried a valid internal header csum. rfc v2 -> rfc v3: - fixed typos in macro name and comments - really enforce UDP_GRO_CNT_MAX, instead of UDP_GRO_CNT_MAX + 1 - acquire socket lock in UDP_GRO setsockopt rfc v1 -> rfc v2: - use a new option to enable UDP GRO - use static keys to protect the UDP GRO socket lookup Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/uapi/linux/udp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index 09502de447f5..30baccb6c9c4 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -33,6 +33,7 @@ struct udphdr { #define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ #define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */ #define UDP_SEGMENT 103 /* Set GSO segmentation size */ +#define UDP_GRO 104 /* This socket can receive UDP GRO packets */ /* UDP encapsulation types */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ -- cgit v1.2.3 From b4d3069783bccf0c965468da7db141d359d796fc Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 8 Nov 2018 12:19:16 +0100 Subject: vxlan: Allow configuration of DF behaviour Allow users to set the IPv4 DF bit in outgoing packets, or to inherit its value from the IPv4 inner header. If the encapsulated protocol is IPv6 and DF is configured to be inherited, always set it. For IPv4, inheriting DF from the inner header was probably intended from the very beginning judging by the comment to vxlan_xmit(), but it wasn't actually implemented -- also because it would have done more harm than good, without handling for ICMP Fragmentation Needed messages. According to RFC 7348, "Path MTU discovery MAY be used". An expired RFC draft, draft-saum-nvo3-pmtud-over-vxlan-05, whose purpose was to describe PMTUD implementation, says that "is a MUST that Vxlan gateways [...] SHOULD set the DF-bit [...]", whatever that means. Given this background, the only sane option is probably to let the user decide, and keep the current behaviour as default. This only applies to non-lwt tunnels: if an external control plane is used, tunnel key will still control the DF flag. v2: - DF behaviour configuration only applies for non-lwt tunnels, move DF setting to if (!info) block in vxlan_xmit_one() (Stephen Hemminger) Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 1debfa42cba1..efc588949431 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -533,6 +533,7 @@ enum { IFLA_VXLAN_LABEL, IFLA_VXLAN_GPE, IFLA_VXLAN_TTL_INHERIT, + IFLA_VXLAN_DF, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) @@ -542,6 +543,14 @@ struct ifla_vxlan_port_range { __be16 high; }; +enum ifla_vxlan_df { + VXLAN_DF_UNSET = 0, + VXLAN_DF_SET, + VXLAN_DF_INHERIT, + __VXLAN_DF_END, + VXLAN_DF_MAX = __VXLAN_DF_END - 1, +}; + /* GENEVE section */ enum { IFLA_GENEVE_UNSPEC, -- cgit v1.2.3 From a025fb5f49ad38cf749753b16fcd031d0d678f2b Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 8 Nov 2018 12:19:19 +0100 Subject: geneve: Allow configuration of DF behaviour draft-ietf-nvo3-geneve-08 says: It is strongly RECOMMENDED that Path MTU Discovery ([RFC1191], [RFC1981]) be used by setting the DF bit in the IP header when Geneve packets are transmitted over IPv4 (this is the default with IPv6). Now that ICMP error handling is working for GENEVE, we can comply with this recommendation. Make this configurable, though, to avoid breaking existing setups. By default, DF won't be set. It can be set or inherited from inner IPv4 packets. If it's configured to be inherited and we are encapsulating IPv6, it will be set. This only applies to non-lwt tunnels: if an external control plane is used, tunnel key will still control the DF flag. v2: - DF behaviour configuration only applies for non-lwt tunnels, apply DF setting only if (!geneve->collect_md) in geneve_xmit_skb() (Stephen Hemminger) Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index efc588949431..f42c069d81db 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -566,10 +566,19 @@ enum { IFLA_GENEVE_UDP_ZERO_CSUM6_RX, IFLA_GENEVE_LABEL, IFLA_GENEVE_TTL_INHERIT, + IFLA_GENEVE_DF, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) +enum ifla_geneve_df { + GENEVE_DF_UNSET = 0, + GENEVE_DF_SET, + GENEVE_DF_INHERIT, + __GENEVE_DF_END, + GENEVE_DF_MAX = __GENEVE_DF_END - 1, +}; + /* PPP section */ enum { IFLA_PPP_UNSPEC, -- cgit v1.2.3 From c8123ead13a5c92dc5fd15c0fdfe88eef41e6ac1 Mon Sep 17 00:00:00 2001 From: Nitin Hande Date: Sun, 28 Oct 2018 21:02:45 -0700 Subject: bpf: Extend the sk_lookup() helper to XDP hookpoint. This patch proposes to extend the sk_lookup() BPF API to the XDP hookpoint. The sk_lookup() helper supports a lookup on incoming packet to find the corresponding socket that will receive this packet. Current support for this BPF API is at the tc hookpoint. This patch will extend this API at XDP hookpoint. A XDP program can map the incoming packet to the 5-tuple parameter and invoke the API to find the corresponding socket structure. Signed-off-by: Nitin Hande Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 852dc17ab47a..47d606d744cc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2201,6 +2201,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, *struct bpf_sock* + * return is from reuse->socks[] using hash of the packet. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) * Description @@ -2233,6 +2235,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, *struct bpf_sock* + * return is from reuse->socks[] using hash of the packet. * * int bpf_sk_release(struct bpf_sock *sk) * Description -- cgit v1.2.3 From 9bb7e0f24e7e7d00daa1219b14539e2e602649b2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 10 Sep 2018 13:29:12 +0200 Subject: cfg80211: add peer measurement with FTM initiator API Add a new "peer measurement" API, that can be used to measure certain things related to a peer. Right now, only implement FTM (flight time measurement) over it, but the idea is that it'll be extensible to also support measuring the necessary things to calculate e.g. angle-of-arrival for WiGig. The API is structured to have a generic list of peers and channels to measure with/on, and then for each of those a set of measurements (again, only FTM right now) to perform. Results are sent to the requesting socket, including a final complete message. Closing the controlling netlink socket will abort a running measurement. v3: - add a bit to report "final" for partial results - remove list keeping etc. and just unicast out the results to the requester (big code reduction ...) - also send complete message unicast, and as a result remove the multicast group - separate out struct cfg80211_pmsr_ftm_request_peer from struct cfg80211_pmsr_request_peer - document timeout == 0 if no timeout - disallow setting timeout nl80211 attribute to 0, must not include attribute for no timeout - make MAC address randomization optional - change num bursts exponent default to 0 (1 burst, rather rather than the old default of 15==don't care) v4: - clarify NL80211_ATTR_TIMEOUT documentation v5: - remove unnecessary nl80211 multicast/family changes - remove partial results bit/flag, final is sufficient - add max_bursts_exponent, max_ftms_per_burst to capability - rename "frames per burst" -> "FTMs per burst" v6: - rename cfg80211_pmsr_free_wdev() to cfg80211_pmsr_wdev_down() and call it in leave, so the device can't go down with any pending measurements v7: - wording fixes (Lior) - fix ftm.max_bursts_exponent to allow having the limit of 0 (Lior) v8: - copyright statements - minor coding style fixes - fix error path leak Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 418 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 418 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6d610bae30a9..e45b88925783 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1036,6 +1036,30 @@ * @NL80211_CMD_GET_FTM_RESPONDER_STATS: Retrieve FTM responder statistics, in * the %NL80211_ATTR_FTM_RESPONDER_STATS attribute. * + * @NL80211_CMD_PEER_MEASUREMENT_START: start a (set of) peer measurement(s) + * with the given parameters, which are encapsulated in the nested + * %NL80211_ATTR_PEER_MEASUREMENTS attribute. Optionally, MAC address + * randomization may be enabled and configured by specifying the + * %NL80211_ATTR_MAC and %NL80211_ATTR_MAC_MASK attributes. + * If a timeout is requested, use the %NL80211_ATTR_TIMEOUT attribute. + * A u64 cookie for further %NL80211_ATTR_COOKIE use is is returned in + * the netlink extended ack message. + * + * To cancel a measurement, close the socket that requested it. + * + * Measurement results are reported to the socket that requested the + * measurement using @NL80211_CMD_PEER_MEASUREMENT_RESULT when they + * become available, so applications must ensure a large enough socket + * buffer size. + * + * Depending on driver support it may or may not be possible to start + * multiple concurrent measurements. + * @NL80211_CMD_PEER_MEASUREMENT_RESULT: This command number is used for the + * result notification from the driver to the requesting socket. + * @NL80211_CMD_PEER_MEASUREMENT_COMPLETE: Notification only, indicating that + * the measurement completed, using the measurement cookie + * (%NL80211_ATTR_COOKIE). + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1250,6 +1274,10 @@ enum nl80211_commands { NL80211_CMD_GET_FTM_RESPONDER_STATS, + NL80211_CMD_PEER_MEASUREMENT_START, + NL80211_CMD_PEER_MEASUREMENT_RESULT, + NL80211_CMD_PEER_MEASUREMENT_COMPLETE, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2254,6 +2282,16 @@ enum nl80211_commands { * @NL80211_ATTR_FTM_RESPONDER_STATS: Nested attribute with FTM responder * statistics, see &enum nl80211_ftm_responder_stats. * + * @NL80211_ATTR_TIMEOUT: Timeout for the given operation in milliseconds (u32), + * if the attribute is not given no timeout is requested. Note that 0 is an + * invalid value. + * + * @NL80211_ATTR_PEER_MEASUREMENTS: peer measurements request (and result) + * data, uses nested attributes specified in + * &enum nl80211_peer_measurement_attrs. + * This is also used for capability advertisement in the wiphy information, + * with the appropriate sub-attributes. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2699,6 +2737,10 @@ enum nl80211_attrs { NL80211_ATTR_FTM_RESPONDER_STATS, + NL80211_ATTR_TIMEOUT, + + NL80211_ATTR_PEER_MEASUREMENTS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5906,4 +5948,380 @@ enum nl80211_ftm_responder_stats { NL80211_FTM_STATS_MAX = __NL80211_FTM_STATS_AFTER_LAST - 1 }; +/** + * enum nl80211_preamble - frame preamble types + * @NL80211_PREAMBLE_LEGACY: legacy (HR/DSSS, OFDM, ERP PHY) preamble + * @NL80211_PREAMBLE_HT: HT preamble + * @NL80211_PREAMBLE_VHT: VHT preamble + * @NL80211_PREAMBLE_DMG: DMG preamble + */ +enum nl80211_preamble { + NL80211_PREAMBLE_LEGACY, + NL80211_PREAMBLE_HT, + NL80211_PREAMBLE_VHT, + NL80211_PREAMBLE_DMG, +}; + +/** + * enum nl80211_peer_measurement_type - peer measurement types + * @NL80211_PMSR_TYPE_INVALID: invalid/unused, needed as we use + * these numbers also for attributes + * + * @NL80211_PMSR_TYPE_FTM: flight time measurement + * + * @NUM_NL80211_PMSR_TYPES: internal + * @NL80211_PMSR_TYPE_MAX: highest type number + */ +enum nl80211_peer_measurement_type { + NL80211_PMSR_TYPE_INVALID, + + NL80211_PMSR_TYPE_FTM, + + NUM_NL80211_PMSR_TYPES, + NL80211_PMSR_TYPE_MAX = NUM_NL80211_PMSR_TYPES - 1 +}; + +/** + * enum nl80211_peer_measurement_status - peer measurement status + * @NL80211_PMSR_STATUS_SUCCESS: measurement completed successfully + * @NL80211_PMSR_STATUS_REFUSED: measurement was locally refused + * @NL80211_PMSR_STATUS_TIMEOUT: measurement timed out + * @NL80211_PMSR_STATUS_FAILURE: measurement failed, a type-dependent + * reason may be available in the response data + */ +enum nl80211_peer_measurement_status { + NL80211_PMSR_STATUS_SUCCESS, + NL80211_PMSR_STATUS_REFUSED, + NL80211_PMSR_STATUS_TIMEOUT, + NL80211_PMSR_STATUS_FAILURE, +}; + +/** + * enum nl80211_peer_measurement_req - peer measurement request attributes + * @__NL80211_PMSR_REQ_ATTR_INVALID: invalid + * + * @NL80211_PMSR_REQ_ATTR_DATA: This is a nested attribute with measurement + * type-specific request data inside. The attributes used are from the + * enums named nl80211_peer_measurement__req. + * @NL80211_PMSR_REQ_ATTR_GET_AP_TSF: include AP TSF timestamp, if supported + * (flag attribute) + * + * @NUM_NL80211_PMSR_REQ_ATTRS: internal + * @NL80211_PMSR_REQ_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_req { + __NL80211_PMSR_REQ_ATTR_INVALID, + + NL80211_PMSR_REQ_ATTR_DATA, + NL80211_PMSR_REQ_ATTR_GET_AP_TSF, + + /* keep last */ + NUM_NL80211_PMSR_REQ_ATTRS, + NL80211_PMSR_REQ_ATTR_MAX = NUM_NL80211_PMSR_REQ_ATTRS - 1 +}; + +/** + * enum nl80211_peer_measurement_resp - peer measurement response attributes + * @__NL80211_PMSR_RESP_ATTR_INVALID: invalid + * + * @NL80211_PMSR_RESP_ATTR_DATA: This is a nested attribute with measurement + * type-specific results inside. The attributes used are from the enums + * named nl80211_peer_measurement__resp. + * @NL80211_PMSR_RESP_ATTR_STATUS: u32 value with the measurement status + * (using values from &enum nl80211_peer_measurement_status.) + * @NL80211_PMSR_RESP_ATTR_HOST_TIME: host time (%CLOCK_BOOTTIME) when the + * result was measured; this value is not expected to be accurate to + * more than 20ms. (u64, nanoseconds) + * @NL80211_PMSR_RESP_ATTR_AP_TSF: TSF of the AP that the interface + * doing the measurement is connected to when the result was measured. + * This shall be accurately reported if supported and requested + * (u64, usec) + * @NL80211_PMSR_RESP_ATTR_FINAL: If results are sent to the host partially + * (*e.g. with FTM per-burst data) this flag will be cleared on all but + * the last result; if all results are combined it's set on the single + * result. + * @NL80211_PMSR_RESP_ATTR_PAD: padding for 64-bit attributes, ignore + * + * @NUM_NL80211_PMSR_RESP_ATTRS: internal + * @NL80211_PMSR_RESP_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_resp { + __NL80211_PMSR_RESP_ATTR_INVALID, + + NL80211_PMSR_RESP_ATTR_DATA, + NL80211_PMSR_RESP_ATTR_STATUS, + NL80211_PMSR_RESP_ATTR_HOST_TIME, + NL80211_PMSR_RESP_ATTR_AP_TSF, + NL80211_PMSR_RESP_ATTR_FINAL, + NL80211_PMSR_RESP_ATTR_PAD, + + /* keep last */ + NUM_NL80211_PMSR_RESP_ATTRS, + NL80211_PMSR_RESP_ATTR_MAX = NUM_NL80211_PMSR_RESP_ATTRS - 1 +}; + +/** + * enum nl80211_peer_measurement_peer_attrs - peer attributes for measurement + * @__NL80211_PMSR_PEER_ATTR_INVALID: invalid + * + * @NL80211_PMSR_PEER_ATTR_ADDR: peer's MAC address + * @NL80211_PMSR_PEER_ATTR_CHAN: channel definition, nested, using top-level + * attributes like %NL80211_ATTR_WIPHY_FREQ etc. + * @NL80211_PMSR_PEER_ATTR_REQ: This is a nested attribute indexed by + * measurement type, with attributes from the + * &enum nl80211_peer_measurement_req inside. + * @NL80211_PMSR_PEER_ATTR_RESP: This is a nested attribute indexed by + * measurement type, with attributes from the + * &enum nl80211_peer_measurement_resp inside. + * + * @NUM_NL80211_PMSR_PEER_ATTRS: internal + * @NL80211_PMSR_PEER_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_peer_attrs { + __NL80211_PMSR_PEER_ATTR_INVALID, + + NL80211_PMSR_PEER_ATTR_ADDR, + NL80211_PMSR_PEER_ATTR_CHAN, + NL80211_PMSR_PEER_ATTR_REQ, + NL80211_PMSR_PEER_ATTR_RESP, + + /* keep last */ + NUM_NL80211_PMSR_PEER_ATTRS, + NL80211_PMSR_PEER_ATTR_MAX = NUM_NL80211_PMSR_PEER_ATTRS - 1, +}; + +/** + * enum nl80211_peer_measurement_attrs - peer measurement attributes + * @__NL80211_PMSR_ATTR_INVALID: invalid + * + * @NL80211_PMSR_ATTR_MAX_PEERS: u32 attribute used for capability + * advertisement only, indicates the maximum number of peers + * measurements can be done with in a single request + * @NL80211_PMSR_ATTR_REPORT_AP_TSF: flag attribute in capability + * indicating that the connected AP's TSF can be reported in + * measurement results + * @NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR: flag attribute in capability + * indicating that MAC address randomization is supported. + * @NL80211_PMSR_ATTR_TYPE_CAPA: capabilities reported by the device, + * this contains a nesting indexed by measurement type, and + * type-specific capabilities inside, which are from the enums + * named nl80211_peer_measurement__capa. + * @NL80211_PMSR_ATTR_PEERS: nested attribute, the nesting index is + * meaningless, just a list of peers to measure with, with the + * sub-attributes taken from + * &enum nl80211_peer_measurement_peer_attrs. + * + * @NUM_NL80211_PMSR_ATTR: internal + * @NL80211_PMSR_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_attrs { + __NL80211_PMSR_ATTR_INVALID, + + NL80211_PMSR_ATTR_MAX_PEERS, + NL80211_PMSR_ATTR_REPORT_AP_TSF, + NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR, + NL80211_PMSR_ATTR_TYPE_CAPA, + NL80211_PMSR_ATTR_PEERS, + + /* keep last */ + NUM_NL80211_PMSR_ATTR, + NL80211_PMSR_ATTR_MAX = NUM_NL80211_PMSR_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_capa - FTM capabilities + * @__NL80211_PMSR_FTM_CAPA_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_CAPA_ATTR_ASAP: flag attribute indicating ASAP mode + * is supported + * @NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP: flag attribute indicating non-ASAP + * mode is supported + * @NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI: flag attribute indicating if LCI + * data can be requested during the measurement + * @NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC: flag attribute indicating if civic + * location data can be requested during the measurement + * @NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES: u32 bitmap attribute of bits + * from &enum nl80211_preamble. + * @NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS: bitmap of values from + * &enum nl80211_chan_width indicating the supported channel + * bandwidths for FTM. Note that a higher channel bandwidth may be + * configured to allow for other measurements types with different + * bandwidth requirement in the same measurement. + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT: u32 attribute indicating + * the maximum bursts exponent that can be used (if not present anything + * is valid) + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST: u32 attribute indicating + * the maximum FTMs per burst (if not present anything is valid) + * + * @NUM_NL80211_PMSR_FTM_CAPA_ATTR: internal + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_capa { + __NL80211_PMSR_FTM_CAPA_ATTR_INVALID, + + NL80211_PMSR_FTM_CAPA_ATTR_ASAP, + NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP, + NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI, + NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC, + NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES, + NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST, + + /* keep last */ + NUM_NL80211_PMSR_FTM_CAPA_ATTR, + NL80211_PMSR_FTM_CAPA_ATTR_MAX = NUM_NL80211_PMSR_FTM_CAPA_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_req - FTM request attributes + * @__NL80211_PMSR_FTM_REQ_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_REQ_ATTR_ASAP: ASAP mode requested (flag) + * @NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE: preamble type (see + * &enum nl80211_preamble), optional for DMG (u32) + * @NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP: number of bursts exponent as in + * 802.11-2016 9.4.2.168 "Fine Timing Measurement Parameters element" + * (u8, 0-15, optional with default 15 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD: interval between bursts in units + * of 100ms (u16, optional with default 0) + * @NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION: burst duration, as in 802.11-2016 + * Table 9-257 "Burst Duration field encoding" (u8, 0-15, optional with + * default 15 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST: number of successful FTM frames + * requested per burst + * (u8, 0-31, optional with default 0 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES: number of FTMR frame retries + * (u8, default 3) + * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI: request LCI data (flag) + * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC: request civic location data + * (flag) + * + * @NUM_NL80211_PMSR_FTM_REQ_ATTR: internal + * @NL80211_PMSR_FTM_REQ_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_req { + __NL80211_PMSR_FTM_REQ_ATTR_INVALID, + + NL80211_PMSR_FTM_REQ_ATTR_ASAP, + NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE, + NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP, + NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD, + NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION, + NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST, + NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES, + NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI, + NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC, + + /* keep last */ + NUM_NL80211_PMSR_FTM_REQ_ATTR, + NL80211_PMSR_FTM_REQ_ATTR_MAX = NUM_NL80211_PMSR_FTM_REQ_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_failure_reasons - FTM failure reasons + * @NL80211_PMSR_FTM_FAILURE_UNSPECIFIED: unspecified failure, not used + * @NL80211_PMSR_FTM_FAILURE_NO_RESPONSE: no response from the FTM responder + * @NL80211_PMSR_FTM_FAILURE_REJECTED: FTM responder rejected measurement + * @NL80211_PMSR_FTM_FAILURE_WRONG_CHANNEL: we already know the peer is + * on a different channel, so can't measure (if we didn't know, we'd + * try and get no response) + * @NL80211_PMSR_FTM_FAILURE_PEER_NOT_CAPABLE: peer can't actually do FTM + * @NL80211_PMSR_FTM_FAILURE_INVALID_TIMESTAMP: invalid T1/T4 timestamps + * received + * @NL80211_PMSR_FTM_FAILURE_PEER_BUSY: peer reports busy, you may retry + * later (see %NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME) + * @NL80211_PMSR_FTM_FAILURE_BAD_CHANGED_PARAMS: parameters were changed + * by the peer and are no longer supported + */ +enum nl80211_peer_measurement_ftm_failure_reasons { + NL80211_PMSR_FTM_FAILURE_UNSPECIFIED, + NL80211_PMSR_FTM_FAILURE_NO_RESPONSE, + NL80211_PMSR_FTM_FAILURE_REJECTED, + NL80211_PMSR_FTM_FAILURE_WRONG_CHANNEL, + NL80211_PMSR_FTM_FAILURE_PEER_NOT_CAPABLE, + NL80211_PMSR_FTM_FAILURE_INVALID_TIMESTAMP, + NL80211_PMSR_FTM_FAILURE_PEER_BUSY, + NL80211_PMSR_FTM_FAILURE_BAD_CHANGED_PARAMS, +}; + +/** + * enum nl80211_peer_measurement_ftm_resp - FTM response attributes + * @__NL80211_PMSR_FTM_RESP_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON: FTM-specific failure reason + * (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_BURST_INDEX: optional, if bursts are reported + * as separate results then it will be the burst index 0...(N-1) and + * the top level will indicate partial results (u32) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS: number of FTM Request frames + * transmitted (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES: number of FTM Request frames + * that were acknowleged (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME: retry time received from the + * busy peer (u32, seconds) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP: actual number of bursts exponent + * used by the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_BURST_DURATION: actual burst duration used by + * the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_FTMS_PER_BURST: actual FTMs per burst used + * by the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_RSSI_AVG: average RSSI across all FTM action + * frames (optional, s32, 1/2 dBm) + * @NL80211_PMSR_FTM_RESP_ATTR_RSSI_SPREAD: RSSI spread across all FTM action + * frames (optional, s32, 1/2 dBm) + * @NL80211_PMSR_FTM_RESP_ATTR_TX_RATE: bitrate we used for the response to the + * FTM action frame (optional, nested, using &enum nl80211_rate_info + * attributes) + * @NL80211_PMSR_FTM_RESP_ATTR_RX_RATE: bitrate the responder used for the FTM + * action frame (optional, nested, using &enum nl80211_rate_info attrs) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_AVG: average RTT (s64, picoseconds, optional + * but one of RTT/DIST must be present) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_VARIANCE: RTT variance (u64, ps^2, note that + * standard deviation is the square root of variance, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_SPREAD: RTT spread (u64, picoseconds, + * optional) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_AVG: average distance (s64, mm, optional + * but one of RTT/DIST must be present) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE: distance variance (u64, mm^2, note + * that standard deviation is the square root of variance, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD: distance spread (u64, mm, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_LCI: LCI data from peer (binary, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC: civic location data from peer + * (binary, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_PAD: ignore, for u64/s64 padding only + * + * @NUM_NL80211_PMSR_FTM_RESP_ATTR: internal + * @NL80211_PMSR_FTM_RESP_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_resp { + __NL80211_PMSR_FTM_RESP_ATTR_INVALID, + + NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON, + NL80211_PMSR_FTM_RESP_ATTR_BURST_INDEX, + NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS, + NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES, + NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME, + NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP, + NL80211_PMSR_FTM_RESP_ATTR_BURST_DURATION, + NL80211_PMSR_FTM_RESP_ATTR_FTMS_PER_BURST, + NL80211_PMSR_FTM_RESP_ATTR_RSSI_AVG, + NL80211_PMSR_FTM_RESP_ATTR_RSSI_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_TX_RATE, + NL80211_PMSR_FTM_RESP_ATTR_RX_RATE, + NL80211_PMSR_FTM_RESP_ATTR_RTT_AVG, + NL80211_PMSR_FTM_RESP_ATTR_RTT_VARIANCE, + NL80211_PMSR_FTM_RESP_ATTR_RTT_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_DIST_AVG, + NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE, + NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_LCI, + NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC, + NL80211_PMSR_FTM_RESP_ATTR_PAD, + + /* keep last */ + NUM_NL80211_PMSR_FTM_RESP_ATTR, + NL80211_PMSR_FTM_RESP_ATTR_MAX = NUM_NL80211_PMSR_FTM_RESP_ATTR - 1 +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From dbdaee7aa6e61f56aac61b71a7807e76f92cc895 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 25 Oct 2018 15:48:53 -0400 Subject: {nl,mac}80211: report gate connectivity in station info Capture the current state of gate connectivity from the mesh formation field in mesh config whenever we receive a beacon, and report that via GET_STATION. This allows applications doing mesh peering in userspace to make peering decisions based on peers' current upstream connectivity. Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e45b88925783..ff6005edf32f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3116,6 +3116,8 @@ enum nl80211_sta_bss_param { * with an FCS error (u32, from this station). This count may not include * some packets with an FCS error due to TA corruption. Hence this counter * might not be fully accurate. + * @NL80211_STA_INFO_CONNECTED_TO_GATE: set to true if STA has a path to a + * mesh gate * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -3158,6 +3160,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_ACK_SIGNAL_AVG, NL80211_STA_INFO_RX_MPDUS, NL80211_STA_INFO_FCS_ERROR_COUNT, + NL80211_STA_INFO_CONNECTED_TO_GATE, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, -- cgit v1.2.3 From 01d66fbd5b18ac9f01a6a2ae1278189d19208ad5 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 25 Oct 2018 17:36:34 -0400 Subject: {nl,mac}80211: add dot11MeshConnectedToMeshGate to meshconf When userspace is controlling mesh routing, it may have better knowledge about whether a mesh STA is connected to a mesh gate than the kernel mpath table. Add dot11MeshConnectedToMeshGate to the mesh config so that such applications can explicitly signal that a mesh STA is connected to a gate, which will then be advertised in the beacon. Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ff6005edf32f..51bd85b7d839 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3117,7 +3117,7 @@ enum nl80211_sta_bss_param { * some packets with an FCS error due to TA corruption. Hence this counter * might not be fully accurate. * @NL80211_STA_INFO_CONNECTED_TO_GATE: set to true if STA has a path to a - * mesh gate + * mesh gate (u8, 0 or 1) * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -3940,6 +3940,11 @@ enum nl80211_mesh_power_mode { * remove it from the STA's list of peers. You may set this to 0 to disable * the removal of the STA. Default is 30 minutes. * + * @NL80211_MESHCONF_CONNECTED_TO_GATE: If set to true then this mesh STA + * will advertise that it is connected to a gate in the mesh formation + * field. If left unset then the mesh formation field will only + * advertise such if there is an active root mesh path. + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -3972,6 +3977,7 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_POWER_MODE, NL80211_MESHCONF_AWAKE_WINDOW, NL80211_MESHCONF_PLINK_TIMEOUT, + NL80211_MESHCONF_CONNECTED_TO_GATE, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, -- cgit v1.2.3 From 361800876f80da3915c46e388fc682532228b2c3 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 9 Nov 2018 11:14:44 +0100 Subject: ptp: add PTP_SYS_OFFSET_EXTENDED ioctl The PTP_SYS_OFFSET ioctl, which can be used to measure the offset between a PHC and the system clock, includes the total time that the driver needs to read the PHC timestamp. This typically involves reading of multiple PCI registers (sometimes in multiple iterations) and the register that contains the lowest bits of the timestamp is not read in the middle between the two readings of the system clock. This asymmetry causes the measured offset to have a significant error. Introduce a new ioctl, driver function, and helper functions, which allow the reading of the lowest register to be isolated from the other readings in order to reduce the asymmetry. The ioctl returns three timestamps for each measurement: - system time right before reading the lowest bits of the PHC timestamp - PHC time - system time immediately after reading the lowest bits of the PHC timestamp Cc: Richard Cochran Cc: Jacob Keller Cc: Marcelo Tosatti Signed-off-by: Miroslav Lichvar Signed-off-by: David S. Miller --- include/uapi/linux/ptp_clock.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 3039bf6a742e..d73d83950265 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -84,6 +84,16 @@ struct ptp_sys_offset { struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1]; }; +struct ptp_sys_offset_extended { + unsigned int n_samples; /* Desired number of measurements. */ + unsigned int rsv[3]; /* Reserved for future use. */ + /* + * Array of [system, phc, system] time stamps. The kernel will provide + * 3*n_samples time stamps. + */ + struct ptp_clock_time ts[PTP_MAX_SAMPLES][3]; +}; + struct ptp_sys_offset_precise { struct ptp_clock_time device; struct ptp_clock_time sys_realtime; @@ -136,6 +146,8 @@ struct ptp_pin_desc { #define PTP_PIN_SETFUNC _IOW(PTP_CLK_MAGIC, 7, struct ptp_pin_desc) #define PTP_SYS_OFFSET_PRECISE \ _IOWR(PTP_CLK_MAGIC, 8, struct ptp_sys_offset_precise) +#define PTP_SYS_OFFSET_EXTENDED \ + _IOW(PTP_CLK_MAGIC, 9, struct ptp_sys_offset_extended) struct ptp_extts_event { struct ptp_clock_time t; /* Time event occured. */ -- cgit v1.2.3 From 48872c11b77271ef9b070bdc50afe6655c4eb9aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 11 Nov 2018 09:11:31 -0800 Subject: net_sched: sch_fq: add dctcp-like marking Similar to 80ba92fa1a92 ("codel: add ce_threshold attribute") After EDT adoption, it became easier to implement DCTCP-like CE marking. In many cases, queues are not building in the network fabric but on the hosts themselves. If packets leaving fq missed their Earliest Departure Time by XXX usec, we mark them with ECN CE. This gives a feedback (after one RTT) to the sender to slow down and find better operating mode. Example : tc qd replace dev eth0 root fq ce_threshold 2.5ms Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 89ee47c2f17d..ee017bc057a3 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -864,6 +864,8 @@ enum { TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */ + TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */ + __TCA_FQ_MAX }; @@ -882,6 +884,7 @@ struct tc_fq_qd_stats { __u32 inactive_flows; __u32 throttled_flows; __u32 unthrottle_latency_ns; + __u64 ce_mark; /* packets above ce_threshold */ }; /* Heavy-Hitter Filter */ -- cgit v1.2.3 From 9b076f1c0f4869b838a1b7aa0edb5664d47ec8aa Mon Sep 17 00:00:00 2001 From: Matthew Bobrowski Date: Thu, 8 Nov 2018 14:07:14 +1100 Subject: fanotify: introduce new event mask FAN_OPEN_EXEC A new event mask FAN_OPEN_EXEC has been defined so that users have the ability to receive events specifically when a file has been opened with the intent to be executed. Events of FAN_OPEN_EXEC type will be generated when a file has been opened using either execve(), execveat() or uselib() system calls. The feature is implemented within fsnotify_open() by generating the FAN_OPEN_EXEC event type if __FMODE_EXEC is set within file->f_flags. Signed-off-by: Matthew Bobrowski Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index b86740d1c50a..d9664fbc905b 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -10,6 +10,7 @@ #define FAN_CLOSE_WRITE 0x00000008 /* Writtable file closed */ #define FAN_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ #define FAN_OPEN 0x00000020 /* File was opened */ +#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ -- cgit v1.2.3 From 66917a3130f218dcef9eeab4fd11a71cd00cd7c9 Mon Sep 17 00:00:00 2001 From: Matthew Bobrowski Date: Thu, 8 Nov 2018 14:12:44 +1100 Subject: fanotify: introduce new event mask FAN_OPEN_EXEC_PERM A new event mask FAN_OPEN_EXEC_PERM has been defined. This allows users to receive events and grant access to files that are intending to be opened for execution. Events of FAN_OPEN_EXEC_PERM type will be generated when a file has been opened by using either execve(), execveat() or uselib() system calls. This acts in the same manner as previous permission event mask, meaning that an access response is required from the user application in order to permit any further operations on the file. Signed-off-by: Matthew Bobrowski Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index d9664fbc905b..909c98fcace2 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -16,6 +16,7 @@ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ +#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ #define FAN_ONDIR 0x40000000 /* event occurred against dir */ -- cgit v1.2.3 From 5c72299fba9df407c2f2994e194edebf878996ee Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Mon, 12 Nov 2018 16:15:55 -0800 Subject: net: sched: cls_flower: Classify packets using port ranges Added support in tc flower for filtering based on port ranges. Example: 1. Match on a port range: ------------------------- $ tc filter add dev enp4s0 protocol ip parent ffff:\ prio 1 flower ip_proto tcp dst_port range 20-30 skip_hw\ action drop $ tc -s filter show dev enp4s0 parent ffff: filter protocol ip pref 1 flower chain 0 filter protocol ip pref 1 flower chain 0 handle 0x1 eth_type ipv4 ip_proto tcp dst_port range 20-30 skip_hw not_in_hw action order 1: gact action drop random type none pass val 0 index 1 ref 1 bind 1 installed 85 sec used 3 sec Action statistics: Sent 460 bytes 10 pkt (dropped 10, overlimits 0 requeues 0) backlog 0b 0p requeues 0 2. Match on IP address and port range: -------------------------------------- $ tc filter add dev enp4s0 protocol ip parent ffff:\ prio 1 flower dst_ip 192.168.1.1 ip_proto tcp dst_port range 100-200\ skip_hw action drop $ tc -s filter show dev enp4s0 parent ffff: filter protocol ip pref 1 flower chain 0 handle 0x2 eth_type ipv4 ip_proto tcp dst_ip 192.168.1.1 dst_port range 100-200 skip_hw not_in_hw action order 1: gact action drop random type none pass val 0 index 2 ref 1 bind 1 installed 58 sec used 2 sec Action statistics: Sent 920 bytes 20 pkt (dropped 20, overlimits 0 requeues 0) backlog 0b 0p requeues 0 v4: 1. Added condition before setting port key. 2. Organized setting and dumping port range keys into functions and added validation of input range. v3: 1. Moved new fields in UAPI enum to the end of enum. 2. Removed couple of empty lines. v2: Addressed Jiri's comments: 1. Added separate functions for dst and src comparisons. 2. Removed endpoint enum. 3. Added new bit TCA_FLOWER_FLAGS_RANGE to decide normal/range lookup. 4. Cleaned up fl_lookup function. Signed-off-by: Amritha Nambiar Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 401d0c1e612d..95d0db2a8350 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -485,6 +485,11 @@ enum { TCA_FLOWER_IN_HW_COUNT, + TCA_FLOWER_KEY_PORT_SRC_MIN, /* be16 */ + TCA_FLOWER_KEY_PORT_SRC_MAX, /* be16 */ + TCA_FLOWER_KEY_PORT_DST_MIN, /* be16 */ + TCA_FLOWER_KEY_PORT_DST_MAX, /* be16 */ + __TCA_FLOWER_MAX, }; @@ -518,6 +523,8 @@ enum { TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), }; +#define TCA_FLOWER_MASK_FLAGS_RANGE (1 << 0) /* Range-based match */ + /* Match-all classifier */ enum { -- cgit v1.2.3 From dfdda82e3b84c13601be09f8351ec4f15a4fbe03 Mon Sep 17 00:00:00 2001 From: Vitaly Chikunov Date: Wed, 7 Nov 2018 00:00:02 +0300 Subject: crypto: streebog - register Streebog in hash info for IMA Register Streebog hash function in Hash Info arrays to let IMA use it for its purposes. Cc: linux-integrity@vger.kernel.org Signed-off-by: Vitaly Chikunov Reviewed-by: Mimi Zohar Signed-off-by: Herbert Xu --- include/uapi/linux/hash_info.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/hash_info.h b/include/uapi/linux/hash_info.h index eea5d02c58de..74a8609fcb4d 100644 --- a/include/uapi/linux/hash_info.h +++ b/include/uapi/linux/hash_info.h @@ -33,6 +33,8 @@ enum hash_algo { HASH_ALGO_TGR_160, HASH_ALGO_TGR_192, HASH_ALGO_SM3_256, + HASH_ALGO_STREEBOG_256, + HASH_ALGO_STREEBOG_512, HASH_ALGO__LAST }; -- cgit v1.2.3 From 80e22e961dfd15530215f6f6dcd94cd8f65ba1ea Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 14 Nov 2018 22:23:49 -0800 Subject: net: sched: gred: provide a better structured dump and expose stats Currently all GRED's virtual queue data is dumped in a single array in a single attribute. This makes it pretty much impossible to add new fields. In order to expose more detailed stats add a new set of attributes. We can now expose the 64 bit value of bytesin and all the mark stats which were not part of the original design. Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index ee017bc057a3..c8f717346b60 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -291,11 +291,37 @@ enum { TCA_GRED_DPS, TCA_GRED_MAX_P, TCA_GRED_LIMIT, + TCA_GRED_VQ_LIST, /* nested TCA_GRED_VQ_ENTRY */ __TCA_GRED_MAX, }; #define TCA_GRED_MAX (__TCA_GRED_MAX - 1) +enum { + TCA_GRED_VQ_ENTRY_UNSPEC, + TCA_GRED_VQ_ENTRY, /* nested TCA_GRED_VQ_* */ + __TCA_GRED_VQ_ENTRY_MAX, +}; +#define TCA_GRED_VQ_ENTRY_MAX (__TCA_GRED_VQ_ENTRY_MAX - 1) + +enum { + TCA_GRED_VQ_UNSPEC, + TCA_GRED_VQ_PAD, + TCA_GRED_VQ_DP, /* u32 */ + TCA_GRED_VQ_STAT_BYTES, /* u64 */ + TCA_GRED_VQ_STAT_PACKETS, /* u32 */ + TCA_GRED_VQ_STAT_BACKLOG, /* u32 */ + TCA_GRED_VQ_STAT_PROB_DROP, /* u32 */ + TCA_GRED_VQ_STAT_PROB_MARK, /* u32 */ + TCA_GRED_VQ_STAT_FORCED_DROP, /* u32 */ + TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */ + TCA_GRED_VQ_STAT_PDROP, /* u32 */ + TCA_GRED_VQ_STAT_OTHER, /* u32 */ + __TCA_GRED_VQ_MAX +}; + +#define TCA_GRED_VQ_MAX (__TCA_GRED_VQ_MAX - 1) + struct tc_gred_qopt { __u32 limit; /* HARD maximal queue length (bytes) */ __u32 qth_min; /* Min average length threshold (bytes) */ -- cgit v1.2.3 From 72111015024f4eddb5aac400ddbe38a4f8f0279a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 14 Nov 2018 22:23:51 -0800 Subject: net: sched: gred: allow manipulating per-DP RED flags Allow users to set and dump RED flags (ECN enabled and harddrop) on per-virtual queue basis. Validation of attributes is split from changes to make sure we won't have to undo previous operations when we find out configuration is invalid. The objective is to allow changing per-Qdisc parameters without overwriting the per-vq configured flags. Old user space will not pass the TCA_GRED_VQ_FLAGS attribute and per-Qdisc flags will always get propagated to the virtual queues. New user space which wants to make use of per-vq flags should set per-Qdisc flags to 0 and then configure per-vq flags as it sees fit. Once per-vq flags are set per-Qdisc flags can't be changed to non-zero. Vice versa - if the per-Qdisc flags are non-zero the TCA_GRED_VQ_FLAGS attribute has to either be omitted or set to the same value as per-Qdisc flags. Update per-Qdisc parameters: per-Qdisc | per-VQ | result 0 | 0 | all vq flags updated 0 | non-0 | error (vq flags in use) non-0 | 0 | -- impossible -- non-0 | non-0 | all vq flags updated Update per-VQ state (flags parameter not specified): no change to flags Update per-VQ state (flags parameter set): per-Qdisc | per-VQ | result 0 | any | per-vq flags updated non-0 | 0 | -- impossible -- non-0 | non-0 | error (per-Qdisc flags in use) Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index c8f717346b60..0d18b1d1fbbc 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -317,6 +317,7 @@ enum { TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */ TCA_GRED_VQ_STAT_PDROP, /* u32 */ TCA_GRED_VQ_STAT_OTHER, /* u32 */ + TCA_GRED_VQ_FLAGS, /* u32 */ __TCA_GRED_VQ_MAX }; -- cgit v1.2.3 From 54e8cb786130949a4d37792383cb528176771e5d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 15 Nov 2018 15:26:51 -0800 Subject: uapi/ethtool: fix spelling errors Trivial spelling errors found by codespell. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index c8f8e2455bf3..17be76aeb468 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -882,7 +882,7 @@ struct ethtool_rx_flow_spec { __u32 location; }; -/* How rings are layed out when accessing virtual functions or +/* How rings are laid out when accessing virtual functions or * offloaded queues is device specific. To allow users to do flow * steering and specify these queues the ring cookie is partitioned * into a 32bit queue index with an 8 bit virtual function id. @@ -891,7 +891,7 @@ struct ethtool_rx_flow_spec { * devices start supporting PCIe w/ARI. However at the moment I * do not know of any devices that support this so I do not reserve * space for this at this time. If a future patch consumes the next - * byte it should be aware of this possiblity. + * byte it should be aware of this possibility. */ #define ETHTOOL_RX_FLOW_SPEC_RING 0x00000000FFFFFFFFLL #define ETHTOOL_RX_FLOW_SPEC_RING_VF 0x000000FF00000000LL -- cgit v1.2.3 From e8bd8fca6773ef49390269bd467bf940a0841ccf Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Thu, 15 Nov 2018 16:44:12 -0800 Subject: tcp: add SRTT to SCM_TIMESTAMPING_OPT_STATS Add TCP_NLA_SRTT to SCM_TIMESTAMPING_OPT_STATS that reports the smoothed round trip time in microseconds (tcp_sock.srtt_us >> 3). Signed-off-by: Yousuk Seung Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index e02d31986ff9..8bb6cc5f3235 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -266,6 +266,7 @@ enum { TCP_NLA_BYTES_RETRANS, /* Data bytes retransmitted */ TCP_NLA_DSACK_DUPS, /* DSACK blocks received */ TCP_NLA_REORD_SEEN, /* reordering events seen */ + TCP_NLA_SRTT, /* smoothed RTT in usecs */ }; /* for TCP_MD5SIG socket option */ -- cgit v1.2.3 From 8d951a75d022d94a05f5fa74217670a981e8302d Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Fri, 16 Nov 2018 15:51:59 +1100 Subject: net/ncsi: Configure multi-package, multi-channel modes with failover This patch extends the ncsi-netlink interface with two new commands and three new attributes to configure multiple packages and/or channels at once, and configure specific failover modes. NCSI_CMD_SET_PACKAGE mask and NCSI_CMD_SET_CHANNEL_MASK set a whitelist of packages or channels allowed to be configured with the NCSI_ATTR_PACKAGE_MASK and NCSI_ATTR_CHANNEL_MASK attributes respectively. If one of these whitelists is set only packages or channels matching the whitelist are considered for the channel queue in ncsi_choose_active_channel(). These commands may also use the NCSI_ATTR_MULTI_FLAG to signal that multiple packages or channels may be configured simultaneously. NCSI hardware arbitration (HWA) must be available in order to enable multi-package mode. Multi-channel mode is always available. If the NCSI_ATTR_CHANNEL_ID attribute is present in the NCSI_CMD_SET_CHANNEL_MASK command the it sets the preferred channel as with the NCSI_CMD_SET_INTERFACE command. The combination of preferred channel and channel whitelist defines a primary channel and the allowed failover channels. If the NCSI_ATTR_MULTI_FLAG attribute is also present then the preferred channel is configured for Tx/Rx and the other channels are enabled only for Rx. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- include/uapi/linux/ncsi.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ncsi.h b/include/uapi/linux/ncsi.h index 0a26a5576645..a3f87c54fdb3 100644 --- a/include/uapi/linux/ncsi.h +++ b/include/uapi/linux/ncsi.h @@ -26,6 +26,12 @@ * @NCSI_CMD_SEND_CMD: send NC-SI command to network card. * Requires NCSI_ATTR_IFINDEX, NCSI_ATTR_PACKAGE_ID * and NCSI_ATTR_CHANNEL_ID. + * @NCSI_CMD_SET_PACKAGE_MASK: set a whitelist of allowed packages. + * Requires NCSI_ATTR_IFINDEX and NCSI_ATTR_PACKAGE_MASK. + * @NCSI_CMD_SET_CHANNEL_MASK: set a whitelist of allowed channels. + * Requires NCSI_ATTR_IFINDEX, NCSI_ATTR_PACKAGE_ID, and + * NCSI_ATTR_CHANNEL_MASK. If NCSI_ATTR_CHANNEL_ID is present it sets + * the primary channel. * @NCSI_CMD_MAX: highest command number */ enum ncsi_nl_commands { @@ -34,6 +40,8 @@ enum ncsi_nl_commands { NCSI_CMD_SET_INTERFACE, NCSI_CMD_CLEAR_INTERFACE, NCSI_CMD_SEND_CMD, + NCSI_CMD_SET_PACKAGE_MASK, + NCSI_CMD_SET_CHANNEL_MASK, __NCSI_CMD_AFTER_LAST, NCSI_CMD_MAX = __NCSI_CMD_AFTER_LAST - 1 @@ -48,6 +56,10 @@ enum ncsi_nl_commands { * @NCSI_ATTR_PACKAGE_ID: package ID * @NCSI_ATTR_CHANNEL_ID: channel ID * @NCSI_ATTR_DATA: command payload + * @NCSI_ATTR_MULTI_FLAG: flag to signal that multi-mode should be enabled with + * NCSI_CMD_SET_PACKAGE_MASK or NCSI_CMD_SET_CHANNEL_MASK. + * @NCSI_ATTR_PACKAGE_MASK: 32-bit mask of allowed packages. + * @NCSI_ATTR_CHANNEL_MASK: 32-bit mask of allowed channels. * @NCSI_ATTR_MAX: highest attribute number */ enum ncsi_nl_attrs { @@ -57,6 +69,9 @@ enum ncsi_nl_attrs { NCSI_ATTR_PACKAGE_ID, NCSI_ATTR_CHANNEL_ID, NCSI_ATTR_DATA, + NCSI_ATTR_MULTI_FLAG, + NCSI_ATTR_PACKAGE_MASK, + NCSI_ATTR_CHANNEL_MASK, __NCSI_ATTR_AFTER_LAST, NCSI_ATTR_MAX = __NCSI_ATTR_AFTER_LAST - 1 -- cgit v1.2.3 From 2cc0eeb67636e0339ad7b6cdfa305f63983642af Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 18 Nov 2018 16:08:51 +0800 Subject: sctp: define subscribe in sctp_sock as __u16 The member subscribe in sctp_sock is used to indicate to which of the events it is subscribed, more like a group of flags. So it's better to be defined as __u16 (2 bytpes), instead of struct sctp_event_subscribe (13 bytes). Note that sctp_event_subscribe is an UAPI struct, used on sockopt calls, and thus it will not be removed. This patch only changes the internal storage of the flags. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index c81feb373d3e..66afa5b4ab6b 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -632,7 +632,9 @@ union sctp_notification { */ enum sctp_sn_type { - SCTP_SN_TYPE_BASE = (1<<15), + SCTP_SN_TYPE_BASE = (1<<15), + SCTP_DATA_IO_EVENT = SCTP_SN_TYPE_BASE, +#define SCTP_DATA_IO_EVENT SCTP_DATA_IO_EVENT SCTP_ASSOC_CHANGE, #define SCTP_ASSOC_CHANGE SCTP_ASSOC_CHANGE SCTP_PEER_ADDR_CHANGE, @@ -657,6 +659,8 @@ enum sctp_sn_type { #define SCTP_ASSOC_RESET_EVENT SCTP_ASSOC_RESET_EVENT SCTP_STREAM_CHANGE_EVENT, #define SCTP_STREAM_CHANGE_EVENT SCTP_STREAM_CHANGE_EVENT + SCTP_SN_TYPE_MAX = SCTP_STREAM_CHANGE_EVENT, +#define SCTP_SN_TYPE_MAX SCTP_SN_TYPE_MAX }; /* Notification error codes used to fill up the error fields in some -- cgit v1.2.3 From 480ba9c18a27ff77b02a2012e50dfd3e20ee9f7a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 18 Nov 2018 16:08:54 +0800 Subject: sctp: add sockopt SCTP_EVENT This patch adds sockopt SCTP_EVENT described in rfc6525#section-6.2. With this sockopt users can subscribe to an event from a specified asoc. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 66afa5b4ab6b..d584073532b8 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -129,6 +129,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_STREAM_SCHEDULER_VALUE 124 #define SCTP_INTERLEAVING_SUPPORTED 125 #define SCTP_SENDMSG_CONNECT 126 +#define SCTP_EVENT 127 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -1154,6 +1155,12 @@ struct sctp_add_streams { uint16_t sas_outstrms; }; +struct sctp_event { + sctp_assoc_t se_assoc_id; + uint16_t se_type; + uint8_t se_on; +}; + /* SCTP Stream schedulers */ enum sctp_sched_type { SCTP_SS_FCFS, -- cgit v1.2.3 From 96b3b6c9091d23289721350e32c63cc8749686be Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 16 Nov 2018 11:41:08 +0000 Subject: bpf: allow zero-initializing hash map seed Add a new flag BPF_F_ZERO_SEED, which forces a hash map to initialize the seed to zero. This is useful when doing performance analysis both on individual BPF programs, as well as the kernel's hash table implementation. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 47d606d744cc..8c01b89a4cb4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -269,6 +269,9 @@ enum bpf_attach_type { /* Flag for stack_map, store build_id+offset instead of pointer */ #define BPF_F_STACK_BUILD_ID (1U << 5) +/* Zero-initialize hash function seed. This should only be used for testing. */ +#define BPF_F_ZERO_SEED (1U << 6) + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, -- cgit v1.2.3 From 2f1833607aed6a9c1e1729bf0e2588c341ceb409 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 16 Nov 2018 11:41:09 +0000 Subject: bpf: move BPF_F_QUERY_EFFECTIVE after map flags BPF_F_QUERY_EFFECTIVE is in the middle of the flags valid for BPF_MAP_CREATE. Move it to its own section to reduce confusion. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8c01b89a4cb4..05d95290b848 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -257,9 +257,6 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) -/* flags for BPF_PROG_QUERY */ -#define BPF_F_QUERY_EFFECTIVE (1U << 0) - #define BPF_OBJ_NAME_LEN 16U /* Flags for accessing BPF object */ @@ -272,6 +269,9 @@ enum bpf_attach_type { /* Zero-initialize hash function seed. This should only be used for testing. */ #define BPF_F_ZERO_SEED (1U << 6) +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, -- cgit v1.2.3 From 23464f8c3407b83106463999b64fe10dc66ff6a3 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 20 Nov 2018 10:52:33 +0900 Subject: aio: Comment use of IOCB_FLAG_IOPRIO aio flag Comment the use of the IOCB_FLAG_IOPRIO aio flag similarly to the IOCB_FLAG_RESFD flag. Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- include/uapi/linux/aio_abi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index ce43d340f010..8387e0af0f76 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -50,6 +50,8 @@ enum { * * IOCB_FLAG_RESFD - Set if the "aio_resfd" member of the "struct iocb" * is valid. + * IOCB_FLAG_IOPRIO - Set if the "aio_reqprio" member of the "struct iocb" + * is valid. */ #define IOCB_FLAG_RESFD (1 << 0) #define IOCB_FLAG_IOPRIO (1 << 1) -- cgit v1.2.3 From 177bbc67812d96dfec517ef293017ca614a0955a Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 4 Oct 2018 16:30:04 -0400 Subject: media: v4l2-common.h: put backwards compat defines under #ifndef __KERNEL__ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This ensures that they won't be used in kernel code. Signed-off-by: Hans Verkuil Reviewed-by: Niklas Söderlund Acked-by: Sakari Ailus Tested-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-common.h | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-common.h b/include/uapi/linux/v4l2-common.h index 4f7b892377cd..7d21c1634b4d 100644 --- a/include/uapi/linux/v4l2-common.h +++ b/include/uapi/linux/v4l2-common.h @@ -79,24 +79,11 @@ /* Current composing area plus all padding pixels */ #define V4L2_SEL_TGT_COMPOSE_PADDED 0x0103 -/* Backward compatibility target definitions --- to be removed. */ -#define V4L2_SEL_TGT_CROP_ACTIVE V4L2_SEL_TGT_CROP -#define V4L2_SEL_TGT_COMPOSE_ACTIVE V4L2_SEL_TGT_COMPOSE -#define V4L2_SUBDEV_SEL_TGT_CROP_ACTUAL V4L2_SEL_TGT_CROP -#define V4L2_SUBDEV_SEL_TGT_COMPOSE_ACTUAL V4L2_SEL_TGT_COMPOSE -#define V4L2_SUBDEV_SEL_TGT_CROP_BOUNDS V4L2_SEL_TGT_CROP_BOUNDS -#define V4L2_SUBDEV_SEL_TGT_COMPOSE_BOUNDS V4L2_SEL_TGT_COMPOSE_BOUNDS - /* Selection flags */ #define V4L2_SEL_FLAG_GE (1 << 0) #define V4L2_SEL_FLAG_LE (1 << 1) #define V4L2_SEL_FLAG_KEEP_CONFIG (1 << 2) -/* Backward compatibility flag definitions --- to be removed. */ -#define V4L2_SUBDEV_SEL_FLAG_SIZE_GE V4L2_SEL_FLAG_GE -#define V4L2_SUBDEV_SEL_FLAG_SIZE_LE V4L2_SEL_FLAG_LE -#define V4L2_SUBDEV_SEL_FLAG_KEEP_CONFIG V4L2_SEL_FLAG_KEEP_CONFIG - struct v4l2_edid { __u32 pad; __u32 start_block; @@ -105,4 +92,19 @@ struct v4l2_edid { __u8 *edid; }; +#ifndef __KERNEL__ +/* Backward compatibility target definitions --- to be removed. */ +#define V4L2_SEL_TGT_CROP_ACTIVE V4L2_SEL_TGT_CROP +#define V4L2_SEL_TGT_COMPOSE_ACTIVE V4L2_SEL_TGT_COMPOSE +#define V4L2_SUBDEV_SEL_TGT_CROP_ACTUAL V4L2_SEL_TGT_CROP +#define V4L2_SUBDEV_SEL_TGT_COMPOSE_ACTUAL V4L2_SEL_TGT_COMPOSE +#define V4L2_SUBDEV_SEL_TGT_CROP_BOUNDS V4L2_SEL_TGT_CROP_BOUNDS +#define V4L2_SUBDEV_SEL_TGT_COMPOSE_BOUNDS V4L2_SEL_TGT_COMPOSE_BOUNDS + +/* Backward compatibility flag definitions --- to be removed. */ +#define V4L2_SUBDEV_SEL_FLAG_SIZE_GE V4L2_SEL_FLAG_GE +#define V4L2_SUBDEV_SEL_FLAG_SIZE_LE V4L2_SEL_FLAG_LE +#define V4L2_SUBDEV_SEL_FLAG_KEEP_CONFIG V4L2_SEL_FLAG_KEEP_CONFIG +#endif + #endif /* __V4L2_COMMON__ */ -- cgit v1.2.3 From 2667a2626f4da370409c2830552f6e8c8b8c41e2 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 19 Nov 2018 15:29:08 -0800 Subject: bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO This patch adds BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO to support the function debug info. BTF_KIND_FUNC_PROTO must not have a name (i.e. !t->name_off) and it is followed by >= 0 'struct bpf_param' objects to describe the function arguments. The BTF_KIND_FUNC must have a valid name and it must refer back to a BTF_KIND_FUNC_PROTO. The above is the conclusion after the discussion between Edward Cree, Alexei, Daniel, Yonghong and Martin. By combining BTF_KIND_FUNC and BTF_LIND_FUNC_PROTO, a complete function signature can be obtained. It will be used in the later patches to learn the function signature of a running bpf program. Signed-off-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/uapi/linux/btf.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 972265f32871..14f66948fc95 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -40,7 +40,8 @@ struct btf_type { /* "size" is used by INT, ENUM, STRUCT and UNION. * "size" tells the size of the type it is describing. * - * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT. + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, + * FUNC and FUNC_PROTO. * "type" is a type_id referring to another type. */ union { @@ -64,8 +65,10 @@ struct btf_type { #define BTF_KIND_VOLATILE 9 /* Volatile */ #define BTF_KIND_CONST 10 /* Const */ #define BTF_KIND_RESTRICT 11 /* Restrict */ -#define BTF_KIND_MAX 11 -#define NR_BTF_KINDS 12 +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#define BTF_KIND_MAX 13 +#define NR_BTF_KINDS 14 /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. @@ -110,4 +113,13 @@ struct btf_member { __u32 offset; /* offset in bits */ }; +/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". + * The exact number of btf_param is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_param { + __u32 name_off; + __u32 type; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ -- cgit v1.2.3 From 838e96904ff3fc6c30e5ebbc611474669856e3c0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 19 Nov 2018 15:29:11 -0800 Subject: bpf: Introduce bpf_func_info This patch added interface to load a program with the following additional information: . prog_btf_fd . func_info, func_info_rec_size and func_info_cnt where func_info will provide function range and type_id corresponding to each function. The func_info_rec_size is introduced in the UAPI to specify struct bpf_func_info size passed from user space. This intends to make bpf_func_info structure growable in the future. If the kernel gets a different bpf_func_info size from userspace, it will try to handle user request with part of bpf_func_info it can understand. In this patch, kernel can understand struct bpf_func_info { __u32 insn_offset; __u32 type_id; }; If user passed a bpf func_info record size of 16 bytes, the kernel can still handle part of records with the above definition. If verifier agrees with function range provided by the user, the bpf_prog ksym for each function will use the func name provided in the type_id, which is supposed to provide better encoding as it is not limited by 16 bytes program name limitation and this is better for bpf program which contains multiple subprograms. The bpf_prog_info interface is also extended to return btf_id, func_info, func_info_rec_size and func_info_cnt to userspace, so userspace can print out the function prototype for each xlated function. The insn_offset in the returned func_info corresponds to the insn offset for xlated functions. With other jit related fields in bpf_prog_info, userspace can also print out function prototypes for each jited function. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 05d95290b848..c1554aa07465 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -338,6 +338,10 @@ union bpf_attr { * (context accesses, allowed helpers, etc). */ __u32 expected_attach_type; + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -2638,6 +2642,10 @@ struct bpf_prog_info { __u32 nr_jited_func_lens; __aligned_u64 jited_ksyms; __aligned_u64 jited_func_lens; + __u32 btf_id; + __u32 func_info_rec_size; + __aligned_u64 func_info; + __u32 func_info_cnt; } __attribute__((aligned(8))); struct bpf_map_info { @@ -2949,4 +2957,9 @@ struct bpf_flow_keys { }; }; +struct bpf_func_info { + __u32 insn_offset; + __u32 type_id; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 3cd640832894b85b5929d5bda74505452c800421 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 20 Nov 2018 20:41:05 +0000 Subject: MIPS: ptrace: introduce NT_MIPS_MSA regset The current methods for obtaining FP context via ptrace only provide either 32 or 64 bits per data register. With MSA, where vector registers are aliased with scalar FP data registers, those registers are 128 bits wide. Thus a new mechanism is required for userland to access those registers via ptrace. This patch introduces an NT_MIPS_MSA regset which provides, in this order: - The full 128 bits value of each vector register, in native endianness saved as though elements are doubles. That is, the format of each vector register is as would be obtained by saving it to memory using an st.d instruction. - The 32 bit scalar FP implementation register (FIR). - The 32 bit scalar FP control & status register (FCSR). - The 32 bit MSA implementation register (MSAIR). - The 32 bit MSA control & status register (MSACSR). The provision of the FIR & FCSR registers in addition to the MSA equivalents allows scalar FP context to be retrieved as a subset of the context available via this regset. Along with the MSA equivalents they also nicely form the final 128 bit "register" of the regset. Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/21180/ Cc: linux-mips@linux-mips.org --- include/uapi/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index c5358e0ae7c5..d1b093f931e3 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -424,6 +424,7 @@ typedef struct elf64_shdr { #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ #define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */ +#define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ /* Note header in a PT_NOTE section */ typedef struct elf32_note { -- cgit v1.2.3 From 610c0c2b2813c36dc16838bbdbba4c29f8680dde Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 30 Oct 2018 07:32:05 +0100 Subject: virtio-gpu: add VIRTIO_GPU_F_EDID feature The feature allows the guest request an EDID blob (describing monitor capabilities) for a given scanout (aka virtual monitor connector). It brings a new command message, which has just a scanout field (beside the standard virtio-gpu header) and a response message which carries the EDID data. Signed-off-by: Gerd Hoffmann Reviewed-by: Dave Airlie Link: http://patchwork.freedesktop.org/patch/msgid/20181030063206.19528-2-kraxel@redhat.com --- include/uapi/linux/virtio_gpu.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index f43c3c6171ff..8e88eba1fa7a 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -41,6 +41,7 @@ #include #define VIRTIO_GPU_F_VIRGL 0 +#define VIRTIO_GPU_F_EDID 1 enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, @@ -56,6 +57,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING, VIRTIO_GPU_CMD_GET_CAPSET_INFO, VIRTIO_GPU_CMD_GET_CAPSET, + VIRTIO_GPU_CMD_GET_EDID, /* 3d commands */ VIRTIO_GPU_CMD_CTX_CREATE = 0x0200, @@ -76,6 +78,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_OK_DISPLAY_INFO, VIRTIO_GPU_RESP_OK_CAPSET_INFO, VIRTIO_GPU_RESP_OK_CAPSET, + VIRTIO_GPU_RESP_OK_EDID, /* error responses */ VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200, @@ -291,6 +294,21 @@ struct virtio_gpu_resp_capset { __u8 capset_data[]; }; +/* VIRTIO_GPU_CMD_GET_EDID */ +struct virtio_gpu_cmd_get_edid { + struct virtio_gpu_ctrl_hdr hdr; + __le32 scanout; + __le32 padding; +}; + +/* VIRTIO_GPU_RESP_OK_EDID */ +struct virtio_gpu_resp_edid { + struct virtio_gpu_ctrl_hdr hdr; + __le32 size; + __le32 padding; + __u8 edid[1024]; +}; + #define VIRTIO_GPU_EVENT_DISPLAY (1 << 0) struct virtio_gpu_config { -- cgit v1.2.3 From f11216b24219ab26d8d159fbfa12dff886b16e32 Mon Sep 17 00:00:00 2001 From: Vlad Dumitrescu Date: Thu, 22 Nov 2018 14:39:16 -0500 Subject: bpf: add skb->tstamp r/w access from tc clsact and cg skb progs This could be used to rate limit egress traffic in concert with a qdisc which supports Earliest Departure Time, such as FQ. Write access from cg skb progs only with CAP_SYS_ADMIN, since the value will be used by downstream qdiscs. It might make sense to relax this. Changes v1 -> v2: - allow access from cg skb, write only with CAP_SYS_ADMIN Signed-off-by: Vlad Dumitrescu Acked-by: Eric Dumazet Acked-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c1554aa07465..23e2031a43d4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2468,6 +2468,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; + __u64 tstamp; }; struct bpf_tunnel_key { -- cgit v1.2.3 From d644cca50f366cd109845ae92e37c09ed79adf81 Mon Sep 17 00:00:00 2001 From: John Sheu Date: Thu, 15 Nov 2018 10:57:16 -0500 Subject: media: vb2: Allow reqbufs(0) with "in use" MMAP buffers Videobuf2 presently does not allow VIDIOC_REQBUFS to destroy outstanding buffers if the queue is of type V4L2_MEMORY_MMAP, and if the buffers are considered "in use". This is different behavior than for other memory types and prevents us from deallocating buffers in following two cases: 1) There are outstanding mmap()ed views on the buffer. However even if we put the buffer in reqbufs(0), there will be remaining references, due to vma .open/close() adjusting vb2 buffer refcount appropriately. This means that the buffer will be in fact freed only when the last mmap()ed view is unmapped. 2) Buffer has been exported as a DMABUF. Refcount of the vb2 buffer is managed properly by VB2 DMABUF ops, i.e. incremented on DMABUF get and decremented on DMABUF release. This means that the buffer will be alive until all importers release it. Considering both cases above, there does not seem to be any need to prevent reqbufs(0) operation, because buffer lifetime is already properly managed by both mmap() and DMABUF code paths. Let's remove it and allow userspace freeing the queue (and potentially allocating a new one) even though old buffers might be still in processing. To let userspace know that the kernel now supports orphaning buffers that are still in use, add a new V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS to be set by reqbufs and create_bufs. [p.zabel@pengutronix.de: added V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS, updated documentation, and added back debug message] Signed-off-by: John Sheu Reviewed-by: Pawel Osciak Signed-off-by: Tomasz Figa Signed-off-by: Philipp Zabel Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil [hverkuil-cisco@xs4all.nl: added V4L2-BUF-CAP-SUPPORTS-ORPHANED-BUFS ref] Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index c8e8ff810190..2a223835214c 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -879,6 +879,7 @@ struct v4l2_requestbuffers { #define V4L2_BUF_CAP_SUPPORTS_USERPTR (1 << 1) #define V4L2_BUF_CAP_SUPPORTS_DMABUF (1 << 2) #define V4L2_BUF_CAP_SUPPORTS_REQUESTS (1 << 3) +#define V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS (1 << 4) /** * struct v4l2_plane - plane info for multi-planar buffers -- cgit v1.2.3 From 89a9157e1253bb3384a7596cb51bf1ceeac063ed Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:18 +0800 Subject: virtio: add packed ring types and macros Add types and macros for packed ring. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- include/uapi/linux/virtio_config.h | 3 +++ include/uapi/linux/virtio_ring.h | 52 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index 449132c76b1c..1196e1c1d4f6 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -75,6 +75,9 @@ */ #define VIRTIO_F_IOMMU_PLATFORM 33 +/* This feature indicates support for the packed virtqueue layout. */ +#define VIRTIO_F_RING_PACKED 34 + /* * Does the device support Single Root I/O Virtualization? */ diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 6d5d5faa989b..2414f8af26b3 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -44,6 +44,13 @@ /* This means the buffer contains a list of buffer descriptors. */ #define VRING_DESC_F_INDIRECT 4 +/* + * Mark a descriptor as available or used in packed ring. + * Notice: they are defined as shifts instead of shifted values. + */ +#define VRING_PACKED_DESC_F_AVAIL 7 +#define VRING_PACKED_DESC_F_USED 15 + /* The Host uses this in used->flags to advise the Guest: don't kick me when * you add a buffer. It's unreliable, so it's simply an optimization. Guest * will still kick if it's out of buffers. */ @@ -53,6 +60,23 @@ * optimization. */ #define VRING_AVAIL_F_NO_INTERRUPT 1 +/* Enable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_ENABLE 0x0 +/* Disable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_DISABLE 0x1 +/* + * Enable events for a specific descriptor in packed ring. + * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). + * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated. + */ +#define VRING_PACKED_EVENT_FLAG_DESC 0x2 + +/* + * Wrap counter bit shift in event suppression structure + * of packed ring. + */ +#define VRING_PACKED_EVENT_F_WRAP_CTR 15 + /* We support indirect buffer descriptors */ #define VIRTIO_RING_F_INDIRECT_DESC 28 @@ -171,4 +195,32 @@ static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); } +struct vring_packed_desc_event { + /* Descriptor Ring Change Event Offset/Wrap Counter. */ + __le16 off_wrap; + /* Descriptor Ring Change Event Flags. */ + __le16 flags; +}; + +struct vring_packed_desc { + /* Buffer Address. */ + __le64 addr; + /* Buffer Length. */ + __le32 len; + /* Buffer ID. */ + __le16 id; + /* The flags depending on descriptor type. */ + __le16 flags; +}; + +struct vring_packed { + unsigned int num; + + struct vring_packed_desc *desc; + + struct vring_packed_desc_event *driver; + + struct vring_packed_desc_event *device; +}; + #endif /* _UAPI_LINUX_VIRTIO_RING_H */ -- cgit v1.2.3 From a428afe82f98d2ffb31c981671630df1fa25906f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 24 Nov 2018 04:34:20 +0200 Subject: net: bridge: add support for user-controlled bool options We have been adding many new bridge options, a big number of which are boolean but still take up netlink attribute ids and waste space in the skb. Recently we discussed learning from link-local packets[1] and decided yet another new boolean option will be needed, thus introducing this API to save some bridge nl space. The API supports changing the value of multiple boolean options at once via the br_boolopt_multi struct which has an optmask (which options to set, bit per opt) and optval (options' new values). Future boolean options will only be added to the br_boolopt_id enum and then will have to be handled in br_boolopt_toggle/get. The API will automatically add the ability to change and export them via netlink, sysfs can use the single boolopt function versions to do the same. The behaviour with failing/succeeding is the same as with normal netlink option changing. If an option requires mapping to internal kernel flag or needs special configuration to be enabled then it should be handled in br_boolopt_toggle. It should also be able to retrieve an option's current state via br_boolopt_get. v2: WARN_ON() on unsupported option as that shouldn't be possible and also will help catch people who add new options without handling them for both set and get. Pass down extack so if an option desires it could set it on error and be more user-friendly. [1] https://www.spinics.net/lists/netdev/msg532698.html Signed-off-by: Nikolay Aleksandrov Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 18 ++++++++++++++++++ include/uapi/linux/if_link.h | 1 + 2 files changed, 19 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index e41eda3c71f1..6dc02c03bdf8 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -292,4 +292,22 @@ struct br_mcast_stats { __u64 mcast_bytes[BR_MCAST_DIR_SIZE]; __u64 mcast_packets[BR_MCAST_DIR_SIZE]; }; + +/* bridge boolean options + * IMPORTANT: if adding a new option do not forget to handle + * it in br_boolopt_toggle/get and bridge sysfs + */ +enum br_boolopt_id { + BR_BOOLOPT_MAX +}; + +/* struct br_boolopt_multi - change multiple bridge boolean options + * + * @optval: new option values (bit per option) + * @optmask: options to change (bit per option) + */ +struct br_boolopt_multi { + __u32 optval; + __u32 optmask; +}; #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index f42c069d81db..d6533828123a 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -288,6 +288,7 @@ enum { IFLA_BR_MCAST_IGMP_VERSION, IFLA_BR_MCAST_MLD_VERSION, IFLA_BR_VLAN_STATS_PER_PORT, + IFLA_BR_MULTI_BOOLOPT, __IFLA_BR_MAX, }; -- cgit v1.2.3 From 70e4272b4c81828e7d942209bae83b9d92752cfe Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 24 Nov 2018 04:34:21 +0200 Subject: net: bridge: add no_linklocal_learn bool option Use the new boolopt API to add an option which disables learning from link-local packets. The default is kept as before and learning is enabled. This is a simple map from a boolopt bit to a bridge private flag that is tested before learning. v2: pass NULL for extack via sysfs Signed-off-by: Nikolay Aleksandrov Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 6dc02c03bdf8..773e476a8e54 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -294,10 +294,13 @@ struct br_mcast_stats { }; /* bridge boolean options + * BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets + * * IMPORTANT: if adding a new option do not forget to handle * it in br_boolopt_toggle/get and bridge sysfs */ enum br_boolopt_id { + BR_BOOLOPT_NO_LL_LEARN, BR_BOOLOPT_MAX }; -- cgit v1.2.3 From cff478b9d9ccaee0de0e02700c63addf007b5d3c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 26 Nov 2018 15:42:04 +0100 Subject: netns: add support of NETNSA_TARGET_NSID Like it was done for link and address, add the ability to perform get/dump in another netns by specifying a target nsid attribute. Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/net_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_namespace.h b/include/uapi/linux/net_namespace.h index 0187c74d8889..0ed9dd61d32a 100644 --- a/include/uapi/linux/net_namespace.h +++ b/include/uapi/linux/net_namespace.h @@ -16,6 +16,7 @@ enum { NETNSA_NSID, NETNSA_PID, NETNSA_FD, + NETNSA_TARGET_NSID, __NETNSA_MAX, }; -- cgit v1.2.3 From 288f06a001eb6265122c620295b68a0dd53d1482 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 26 Nov 2018 15:42:06 +0100 Subject: netns: enable to dump full nsid translation table Like the previous patch, the goal is to ease to convert nsids from one netns to another netns. A new attribute (NETNSA_CURRENT_NSID) is added to the kernel answer when NETNSA_TARGET_NSID is provided, thus the user can easily convert nsids. Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/net_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_namespace.h b/include/uapi/linux/net_namespace.h index 0ed9dd61d32a..9f9956809565 100644 --- a/include/uapi/linux/net_namespace.h +++ b/include/uapi/linux/net_namespace.h @@ -17,6 +17,7 @@ enum { NETNSA_PID, NETNSA_FD, NETNSA_TARGET_NSID, + NETNSA_CURRENT_NSID, __NETNSA_MAX, }; -- cgit v1.2.3 From 7246d8ed4dcce23f7509949a77be15fa9f0e3d28 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 26 Nov 2018 14:16:17 -0800 Subject: bpf: helper to pop data from messages This adds a BPF SK_MSG program helper so that we can pop data from a msg. We use this to pop metadata from a previous push data call. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 23e2031a43d4..597afdbc1ab9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2268,6 +2268,19 @@ union bpf_attr { * * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) + * Description + * Will remove *pop* bytes from a *msg* starting at byte *start*. + * This may result in **ENOMEM** errors under certain situations if + * an allocation and copy are required due to a full ring buffer. + * However, the helper will try to avoid doing the allocation + * if possible. Other errors can occur if input parameters are + * invalid either due to *start* byte not being valid part of msg + * payload and/or *pop* value being to large. + * + * Return + * 0 on success, or a negative erro in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2360,7 +2373,8 @@ union bpf_attr { FN(map_push_elem), \ FN(map_pop_elem), \ FN(map_peek_elem), \ - FN(msg_push_data), + FN(msg_push_data), \ + FN(msg_pop_data), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 4f693b55c3d2d2239b8a0094b518a1e533cf75d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Nov 2018 14:42:03 -0800 Subject: tcp: implement coalescing on backlog queue In case GRO is not as efficient as it should be or disabled, we might have a user thread trapped in __release_sock() while softirq handler flood packets up to the point we have to drop. This patch balances work done from user thread and softirq, to give more chances to __release_sock() to complete its work before new packets are added the the backlog. This also helps if we receive many ACK packets, since GRO does not aggregate them. This patch brings ~60% throughput increase on a receiver without GRO, but the spectacular gain is really on 1000x release_sock() latency reduction I have measured. Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index f80135e5feaa..86dc24a96c90 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -243,6 +243,7 @@ enum LINUX_MIB_TCPREQQFULLDROP, /* TCPReqQFullDrop */ LINUX_MIB_TCPRETRANSFAIL, /* TCPRetransFail */ LINUX_MIB_TCPRCVCOALESCE, /* TCPRcvCoalesce */ + LINUX_MIB_TCPBACKLOGCOALESCE, /* TCPBacklogCoalesce */ LINUX_MIB_TCPOFOQUEUE, /* TCPOFOQueue */ LINUX_MIB_TCPOFODROP, /* TCPOFODrop */ LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */ -- cgit v1.2.3 From 26d31925cd5ea4b5b168ed538b0326d63ccbb384 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 28 Nov 2018 19:12:56 +0100 Subject: tun: implement carrier change The userspace may need to control the carrier state. Signed-off-by: Nicolas Dichtel Signed-off-by: Didier Pallard Signed-off-by: David S. Miller --- include/uapi/linux/if_tun.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index ee432cd3018c..23a6753b37df 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -59,6 +59,7 @@ #define TUNGETVNETBE _IOR('T', 223, int) #define TUNSETSTEERINGEBPF _IOR('T', 224, int) #define TUNSETFILTEREBPF _IOR('T', 225, int) +#define TUNSETCARRIER _IOW('T', 226, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 -- cgit v1.2.3 From e9ee9efc0d176512cdce9d27ff8549d7ffa2bfcd Mon Sep 17 00:00:00 2001 From: David Miller Date: Fri, 30 Nov 2018 21:08:14 -0800 Subject: bpf: Add BPF_F_ANY_ALIGNMENT. Often we want to write tests cases that check things like bad context offset accesses. And one way to do this is to use an odd offset on, for example, a 32-bit load. This unfortunately triggers the alignment checks first on platforms that do not set CONFIG_EFFICIENT_UNALIGNED_ACCESS. So the test case see the alignment failure rather than what it was testing for. It is often not completely possible to respect the original intention of the test, or even test the same exact thing, while solving the alignment issue. Another option could have been to check the alignment after the context and other validations are performed by the verifier, but that is a non-trivial change to the verifier. Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 597afdbc1ab9..8050caea7495 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -232,6 +232,20 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the + * verifier will allow any alignment whatsoever. On platforms + * with strict alignment requirements for loads ands stores (such + * as sparc and mips) the verifier validates that all loads and + * stores provably follow this requirement. This flag turns that + * checking and enforcement off. + * + * It is mostly used for testing when we want to validate the + * context and memory access aspects of the verifier, but because + * of an unaligned access the alignment check would trigger before + * the one we are interested in. + */ +#define BPF_F_ANY_ALIGNMENT (1U << 1) + /* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 -- cgit v1.2.3 From c3e9305983597a61083482581e83f0bd77ba306a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 12 Nov 2018 16:26:44 +0100 Subject: netfilter: remove NFC_* cache bits These are very very (for long time unused) caching infrastructure definition, remove then. They have nothing to do with the NFC subsystem. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter.h | 4 ---- include/uapi/linux/netfilter_decnet.h | 10 ---------- include/uapi/linux/netfilter_ipv4.h | 28 ---------------------------- include/uapi/linux/netfilter_ipv6.h | 29 ----------------------------- 4 files changed, 71 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index cca10e767cd8..ca9e63d6e0e4 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -34,10 +34,6 @@ /* only for userspace compatibility */ #ifndef __KERNEL__ -/* Generic cache responses from hook functions. - <= 0x2000 is used for protocol-flags. */ -#define NFC_UNKNOWN 0x4000 -#define NFC_ALTERED 0x8000 /* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */ #define NF_VERDICT_BITS 16 diff --git a/include/uapi/linux/netfilter_decnet.h b/include/uapi/linux/netfilter_decnet.h index 61f1c7dfd033..3c77f54560f2 100644 --- a/include/uapi/linux/netfilter_decnet.h +++ b/include/uapi/linux/netfilter_decnet.h @@ -15,16 +15,6 @@ #include /* for INT_MIN, INT_MAX */ -/* IP Cache bits. */ -/* Src IP address. */ -#define NFC_DN_SRC 0x0001 -/* Dest IP address. */ -#define NFC_DN_DST 0x0002 -/* Input device. */ -#define NFC_DN_IF_IN 0x0004 -/* Output device. */ -#define NFC_DN_IF_OUT 0x0008 - /* kernel define is in netfilter_defs.h */ #define NF_DN_NUMHOOKS 7 #endif /* ! __KERNEL__ */ diff --git a/include/uapi/linux/netfilter_ipv4.h b/include/uapi/linux/netfilter_ipv4.h index c3b060775e13..155e77d6a42d 100644 --- a/include/uapi/linux/netfilter_ipv4.h +++ b/include/uapi/linux/netfilter_ipv4.h @@ -13,34 +13,6 @@ #include /* for INT_MIN, INT_MAX */ -/* IP Cache bits. */ -/* Src IP address. */ -#define NFC_IP_SRC 0x0001 -/* Dest IP address. */ -#define NFC_IP_DST 0x0002 -/* Input device. */ -#define NFC_IP_IF_IN 0x0004 -/* Output device. */ -#define NFC_IP_IF_OUT 0x0008 -/* TOS. */ -#define NFC_IP_TOS 0x0010 -/* Protocol. */ -#define NFC_IP_PROTO 0x0020 -/* IP options. */ -#define NFC_IP_OPTIONS 0x0040 -/* Frag & flags. */ -#define NFC_IP_FRAG 0x0080 - -/* Per-protocol information: only matters if proto match. */ -/* TCP flags. */ -#define NFC_IP_TCPFLAGS 0x0100 -/* Source port. */ -#define NFC_IP_SRC_PT 0x0200 -/* Dest port. */ -#define NFC_IP_DST_PT 0x0400 -/* Something else about the proto */ -#define NFC_IP_PROTO_UNKNOWN 0x2000 - /* IP Hooks */ /* After promisc drops, checksum checks. */ #define NF_IP_PRE_ROUTING 0 diff --git a/include/uapi/linux/netfilter_ipv6.h b/include/uapi/linux/netfilter_ipv6.h index dc624fd24d25..80aa9b0799af 100644 --- a/include/uapi/linux/netfilter_ipv6.h +++ b/include/uapi/linux/netfilter_ipv6.h @@ -16,35 +16,6 @@ #include /* for INT_MIN, INT_MAX */ -/* IP Cache bits. */ -/* Src IP address. */ -#define NFC_IP6_SRC 0x0001 -/* Dest IP address. */ -#define NFC_IP6_DST 0x0002 -/* Input device. */ -#define NFC_IP6_IF_IN 0x0004 -/* Output device. */ -#define NFC_IP6_IF_OUT 0x0008 -/* TOS. */ -#define NFC_IP6_TOS 0x0010 -/* Protocol. */ -#define NFC_IP6_PROTO 0x0020 -/* IP options. */ -#define NFC_IP6_OPTIONS 0x0040 -/* Frag & flags. */ -#define NFC_IP6_FRAG 0x0080 - - -/* Per-protocol information: only matters if proto match. */ -/* TCP flags. */ -#define NFC_IP6_TCPFLAGS 0x0100 -/* Source port. */ -#define NFC_IP6_SRC_PT 0x0200 -/* Dest port. */ -#define NFC_IP6_DST_PT 0x0400 -/* Something else about the proto */ -#define NFC_IP6_PROTO_UNKNOWN 0x2000 - /* IP6 Hooks */ /* After promisc drops, checksum checks. */ #define NF_IP6_PRE_ROUTING 0 -- cgit v1.2.3 From 92799ef7209bfd4c8eadb88c2c8f6fcba544b367 Mon Sep 17 00:00:00 2001 From: Sergey Dorodnicov Date: Wed, 12 Sep 2018 02:42:06 -0400 Subject: media: v4l: Add 4bpp packed depth confidence format CNF4 Adding new fourcc CNF4 for 4 bit-per-pixel packed depth confidence information provided by Intel RealSense cameras. Every two consecutive pixels are packed into a single byte. Signed-off-by: Sergey Dorodnicov Signed-off-by: Evgeni Raikhel Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 2a223835214c..2db1635de956 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -689,6 +689,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_MT21C v4l2_fourcc('M', 'T', '2', '1') /* Mediatek compressed block mode */ #define V4L2_PIX_FMT_INZI v4l2_fourcc('I', 'N', 'Z', 'I') /* Intel Planar Greyscale 10-bit and Depth 16-bit */ #define V4L2_PIX_FMT_SUNXI_TILED_NV12 v4l2_fourcc('S', 'T', '1', '2') /* Sunxi Tiled NV12 Format */ +#define V4L2_PIX_FMT_CNF4 v4l2_fourcc('C', 'N', 'F', '4') /* Intel 4-bit packed depth confidence information */ /* 10bit raw bayer packed, 32 bytes for every 25 pixels, last LSB 6 bits unused */ #define V4L2_PIX_FMT_IPU3_SBGGR10 v4l2_fourcc('i', 'p', '3', 'b') /* IPU3 packed 10-bit BGGR bayer */ -- cgit v1.2.3 From e3da08d057002f9d0831949d51666c3e15dc6b29 Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Sun, 2 Dec 2018 20:18:19 -0500 Subject: bpf: allow BPF read access to qdisc pkt_len The pkt_len field in qdisc_skb_cb stores the skb length as it will appear on the wire after segmentation. For byte accounting, this value is more accurate than skb->len. It is computed on entry to the TC layer, so only valid there. Allow read access to this field from BPF tc classifier and action programs. The implementation is analogous to tc_classid, aside from restricting to read access. To distinguish it from skb->len and self-describe export as wire_len. Changes v1->v2 - Rename pkt_len to wire_len Signed-off-by: Petar Penkov Signed-off-by: Vlad Dumitrescu Signed-off-by: Willem de Bruijn Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8050caea7495..0183b8e70a9e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2497,6 +2497,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; __u64 tstamp; + __u32 wire_len; }; struct bpf_tunnel_key { -- cgit v1.2.3 From 90b1023f68c78b9b85e364250155776c8e421176 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 3 Dec 2018 12:13:35 +0000 Subject: bpf: fix documentation for eBPF helpers The missing indentation on the "Return" sections for bpf_map_pop_elem() and bpf_map_peek_elem() helpers break RST and man pages generation. This patch fixes them, and moves the description of those two helpers towards the end of the list (even though they are somehow related to the three first helpers for maps, the man page explicitly states that the helpers are sorted in chronological order). While at it, bring other minor formatting edits for eBPF helpers documentation: mostly blank lines removal, RST formatting, or other small nits for consistency. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 90 ++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 45 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0183b8e70a9e..572eb2d42768 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -496,18 +496,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) - * Description - * Pop an element from *map*. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) - * Description - * Get an element from *map* without removing it. - * Return - * 0 on success, or a negative error in case of failure. - * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1931,9 +1919,9 @@ union bpf_attr { * is set to metric from route (IPv4/IPv6 only), and ifindex * is set to the device index of the nexthop from the FIB lookup. * - * *plen* argument is the size of the passed in struct. - * *flags* argument can be a combination of one or more of the - * following values: + * *plen* argument is the size of the passed in struct. + * *flags* argument can be a combination of one or more of the + * following values: * * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB @@ -1942,9 +1930,9 @@ union bpf_attr { * Perform lookup from an egress perspective (default is * ingress). * - * *ctx* is either **struct xdp_md** for XDP programs or - * **struct sk_buff** tc cls_act programs. - * Return + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. + * Return * * < 0 if any input argument is invalid * * 0 on success (packet is forwarded, nexthop neighbor exists) * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the @@ -2089,8 +2077,8 @@ union bpf_attr { * translated to a keycode using the rc keymap, and reported as * an input key down event. After a period a key up event is * generated. This period can be extended by calling either - * **bpf_rc_keydown** () again with the same values, or calling - * **bpf_rc_repeat** (). + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). * * Some protocols include a toggle bit, in case the button was * released and pressed again between consecutive scancodes. @@ -2173,21 +2161,22 @@ union bpf_attr { * The *flags* meaning is specific for each map type, * and has to be 0 for cgroup local storage. * - * Depending on the bpf program type, a local storage area - * can be shared between multiple instances of the bpf program, + * Depending on the BPF program type, a local storage area + * can be shared between multiple instances of the BPF program, * running simultaneously. * * A user should care about the synchronization by himself. - * For example, by using the BPF_STX_XADD instruction to alter + * For example, by using the **BPF_STX_XADD** instruction to alter * the shared data. * Return - * Pointer to the local storage area. + * A pointer to the local storage area. * * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description - * Select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY map - * It checks the selected sk is matching the incoming - * request in the skb. + * Select a **SO_REUSEPORT** socket from a + * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * It checks the selected socket is matching the incoming + * request in the socket buffer. * Return * 0 on success, or a negative error in case of failure. * @@ -2195,7 +2184,7 @@ union bpf_attr { * Description * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, - * and if non-NULL, released via **bpf_sk_release**\ (). + * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used @@ -2221,15 +2210,15 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, *struct bpf_sock* - * return is from reuse->socks[] using hash of the packet. + * A pointer to *struct bpf_sock*, or **NULL** in case of failure. + * For sockets with reuseport option, **struct bpf_sock** + * return is from **reuse->socks**\ [] using hash of the packet. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) * Description * Look for UDP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, - * and if non-NULL, released via **bpf_sk_release**\ (). + * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used @@ -2255,46 +2244,57 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, *struct bpf_sock* - * return is from reuse->socks[] using hash of the packet. + * A pointer to **struct bpf_sock**, or **NULL** in case of + * failure. For sockets with reuseport option, **struct bpf_sock** + * return is from **reuse->socks**\ [] using hash of the packet. * - * int bpf_sk_release(struct bpf_sock *sk) + * int bpf_sk_release(struct bpf_sock *sock) * Description - * Release the reference held by *sock*. *sock* must be a non-NULL - * pointer that was returned from bpf_sk_lookup_xxx\ (). + * Release the reference held by *sock*. *sock* must be a + * non-**NULL** pointer that was returned from + * **bpf_sk_lookup_xxx**\ (). * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) * Description - * For socket policies, insert *len* bytes into msg at offset + * For socket policies, insert *len* bytes into *msg* at offset * *start*. * * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a - * *msg* it may want to insert metadata or options into the msg. + * *msg* it may want to insert metadata or options into the *msg*. * This can later be read and used by any of the lower layer BPF * hooks. * * This helper may fail if under memory pressure (a malloc * fails) in these cases BPF programs will get an appropriate * error and BPF programs will need to handle them. - * * Return * 0 on success, or a negative error in case of failure. * * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) - * Description + * Description * Will remove *pop* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if * an allocation and copy are required due to a full ring buffer. * However, the helper will try to avoid doing the allocation * if possible. Other errors can occur if input parameters are - * invalid either due to *start* byte not being valid part of msg + * invalid either due to *start* byte not being valid part of *msg* * payload and/or *pop* value being to large. - * * Return - * 0 on success, or a negative erro in case of failure. + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ -- cgit v1.2.3 From 846e980a87fc30075517d6d979548294d5461bdb Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Mon, 3 Dec 2018 07:58:59 +0000 Subject: devlink: Add 'fw_load_policy' generic parameter Many drivers load the device's firmware image during the initialization flow either from the flash or from the disk. Currently this option is not controlled by the user and the driver decides from where to load the firmware image. 'fw_load_policy' gives the ability to control this option which allows the user to choose between different loading policies supported by the driver. This parameter can be useful while testing and/or debugging the device. For example, testing a firmware bug fix. Signed-off-by: Shalom Toledo Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 79407bbd296d..6e52d3660654 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -163,6 +163,11 @@ enum devlink_param_cmode { DEVLINK_PARAM_CMODE_MAX = __DEVLINK_PARAM_CMODE_MAX - 1 }; +enum devlink_param_fw_load_policy_value { + DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER, + DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, -- cgit v1.2.3 From b5a36b1e1b138285ea0df34bf96c759e1e30fafd Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 3 Dec 2018 11:31:23 +0000 Subject: bpf: respect size hint to BPF_PROG_TEST_RUN if present Use data_size_out as a size hint when copying test output to user space. ENOSPC is returned if the output buffer is too small. Callers which so far did not set data_size_out are not affected. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 572eb2d42768..c8e1eeee2c5f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -374,8 +374,11 @@ union bpf_attr { struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ __u32 prog_fd; __u32 retval; - __u32 data_size_in; - __u32 data_size_out; + __u32 data_size_in; /* input: len of data_in */ + __u32 data_size_out; /* input/output: len of data_out + * returns ENOSPC if data_out + * is too small. + */ __aligned_u64 data_in; __aligned_u64 data_out; __u32 repeat; -- cgit v1.2.3 From cc1068eb6ad21a6cf54aa5f9ae25bf50fd5c9d4b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 15 Nov 2018 15:25:04 -0800 Subject: uapi/nl80211: fix spelling errors Spelling errors found by codespell Signed-off-by: Stephen Hemminger Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 51bd85b7d839..2b53c0e949c7 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1734,7 +1734,7 @@ enum nl80211_commands { * the values passed in @NL80211_ATTR_SCAN_SSIDS (eg. if an SSID * is included in the probe request, but the match attributes * will never let it go through), -EINVAL may be returned. - * If ommited, no filtering is done. + * If omitted, no filtering is done. * * @NL80211_ATTR_INTERFACE_COMBINATIONS: Nested attribute listing the supported * interface combinations. In each nested item, it contains attributes @@ -1839,7 +1839,7 @@ enum nl80211_commands { * * @NL80211_ATTR_INACTIVITY_TIMEOUT: timeout value in seconds, this can be * used by the drivers which has MLME in firmware and does not have support - * to report per station tx/rx activity to free up the staion entry from + * to report per station tx/rx activity to free up the station entry from * the list. This needs to be used when the driver advertises the * capability to timeout the stations. * @@ -2200,7 +2200,7 @@ enum nl80211_commands { * * @NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST: When present the RSSI level for BSSs in * the specified band is to be adjusted before doing - * %NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI based comparision to figure out + * %NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI based comparison to figure out * better BSSs. The attribute value is a packed structure * value as specified by &struct nl80211_bss_select_rssi_adjust. * @@ -4910,7 +4910,7 @@ enum nl80211_iface_limit_attrs { * numbers = [ #{STA} <= 1, #{P2P-client,P2P-GO} <= 3 ], max = 4 * => allows a STA plus three P2P interfaces * - * The list of these four possiblities could completely be contained + * The list of these four possibilities could completely be contained * within the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute to indicate * that any of these groups must match. * @@ -4940,7 +4940,7 @@ enum nl80211_if_combination_attrs { * enum nl80211_plink_state - state of a mesh peer link finite state machine * * @NL80211_PLINK_LISTEN: initial state, considered the implicit - * state of non existant mesh peer links + * state of non existent mesh peer links * @NL80211_PLINK_OPN_SNT: mesh plink open frame has been sent to * this mesh peer * @NL80211_PLINK_OPN_RCVD: mesh plink open frame has been received @@ -5432,7 +5432,7 @@ enum nl80211_timeout_reason { * request parameters IE in the probe request * @NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP: accept broadcast probe responses * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE: send probe request frames at - * rate of at least 5.5M. In case non OCE AP is dicovered in the channel, + * rate of at least 5.5M. In case non OCE AP is discovered in the channel, * only the first probe req in the channel will be sent in high rate. * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: allow probe request * tx deferral (dot11FILSProbeDelay shall be set to 15ms) -- cgit v1.2.3 From d30d42e08c76cb9323ec6121190eb026b07f773b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 5 Dec 2018 17:35:44 -0800 Subject: bpf: Change insn_offset to insn_off in bpf_func_info The later patch will introduce "struct bpf_line_info" which has member "line_off" and "file_off" referring back to the string section in btf. The line_"off" and file_"off" are more consistent to the naming convention in btf.h that means "offset" (e.g. name_off in "struct btf_type"). The to-be-added "struct bpf_line_info" also has another member, "insn_off" which is the same as the "insn_offset" in "struct bpf_func_info". Hence, this patch renames "insn_offset" to "insn_off" for "struct bpf_func_info". Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c8e1eeee2c5f..a84fd232d934 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2991,7 +2991,7 @@ struct bpf_flow_keys { }; struct bpf_func_info { - __u32 insn_offset; + __u32 insn_off; __u32 type_id; }; -- cgit v1.2.3 From 6e8e72cd206e2ba68801e4f2490f639d41808c8d Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 29 Nov 2018 14:42:18 +0000 Subject: crypto: user - convert all stats from u32 to u64 All the 32-bit fields need to be 64-bit. In some cases, UINT32_MAX crypto operations can be done in seconds. Reported-by: Eric Biggers Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- include/uapi/linux/cryptouser.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 6dafbc3e4414..9f8187077ce4 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -79,11 +79,11 @@ struct crypto_user_alg { struct crypto_stat { char type[CRYPTO_MAX_NAME]; union { - __u32 stat_encrypt_cnt; - __u32 stat_compress_cnt; - __u32 stat_generate_cnt; - __u32 stat_hash_cnt; - __u32 stat_setsecret_cnt; + __u64 stat_encrypt_cnt; + __u64 stat_compress_cnt; + __u64 stat_generate_cnt; + __u64 stat_hash_cnt; + __u64 stat_setsecret_cnt; }; union { __u64 stat_encrypt_tlen; @@ -92,29 +92,29 @@ struct crypto_stat { __u64 stat_hash_tlen; }; union { - __u32 stat_akcipher_err_cnt; - __u32 stat_cipher_err_cnt; - __u32 stat_compress_err_cnt; - __u32 stat_aead_err_cnt; - __u32 stat_hash_err_cnt; - __u32 stat_rng_err_cnt; - __u32 stat_kpp_err_cnt; + __u64 stat_akcipher_err_cnt; + __u64 stat_cipher_err_cnt; + __u64 stat_compress_err_cnt; + __u64 stat_aead_err_cnt; + __u64 stat_hash_err_cnt; + __u64 stat_rng_err_cnt; + __u64 stat_kpp_err_cnt; }; union { - __u32 stat_decrypt_cnt; - __u32 stat_decompress_cnt; - __u32 stat_seed_cnt; - __u32 stat_generate_public_key_cnt; + __u64 stat_decrypt_cnt; + __u64 stat_decompress_cnt; + __u64 stat_seed_cnt; + __u64 stat_generate_public_key_cnt; }; union { __u64 stat_decrypt_tlen; __u64 stat_decompress_tlen; }; union { - __u32 stat_verify_cnt; - __u32 stat_compute_shared_secret_cnt; + __u64 stat_verify_cnt; + __u64 stat_compute_shared_secret_cnt; }; - __u32 stat_sign_cnt; + __u64 stat_sign_cnt; }; struct crypto_report_larval { -- cgit v1.2.3 From 7f0a9d5c9d1ba8ab3e5b144e52553744dc0d7471 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 29 Nov 2018 14:42:19 +0000 Subject: crypto: user - split user space crypto stat structures It is cleaner to have each stat in their own structures. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- include/uapi/linux/cryptouser.h | 100 +++++++++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 38 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 9f8187077ce4..3a70f025e27d 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -76,45 +76,69 @@ struct crypto_user_alg { __u32 cru_flags; }; -struct crypto_stat { - char type[CRYPTO_MAX_NAME]; - union { - __u64 stat_encrypt_cnt; - __u64 stat_compress_cnt; - __u64 stat_generate_cnt; - __u64 stat_hash_cnt; - __u64 stat_setsecret_cnt; - }; - union { - __u64 stat_encrypt_tlen; - __u64 stat_compress_tlen; - __u64 stat_generate_tlen; - __u64 stat_hash_tlen; - }; - union { - __u64 stat_akcipher_err_cnt; - __u64 stat_cipher_err_cnt; - __u64 stat_compress_err_cnt; - __u64 stat_aead_err_cnt; - __u64 stat_hash_err_cnt; - __u64 stat_rng_err_cnt; - __u64 stat_kpp_err_cnt; - }; - union { - __u64 stat_decrypt_cnt; - __u64 stat_decompress_cnt; - __u64 stat_seed_cnt; - __u64 stat_generate_public_key_cnt; - }; - union { - __u64 stat_decrypt_tlen; - __u64 stat_decompress_tlen; - }; - union { - __u64 stat_verify_cnt; - __u64 stat_compute_shared_secret_cnt; - }; +struct crypto_stat_aead { + char type[CRYPTO_MAX_NAME]; + __u64 stat_encrypt_cnt; + __u64 stat_encrypt_tlen; + __u64 stat_decrypt_cnt; + __u64 stat_decrypt_tlen; + __u64 stat_aead_err_cnt; +}; + +struct crypto_stat_akcipher { + char type[CRYPTO_MAX_NAME]; + __u64 stat_encrypt_cnt; + __u64 stat_encrypt_tlen; + __u64 stat_decrypt_cnt; + __u64 stat_decrypt_tlen; + __u64 stat_verify_cnt; __u64 stat_sign_cnt; + __u64 stat_akcipher_err_cnt; +}; + +struct crypto_stat_cipher { + char type[CRYPTO_MAX_NAME]; + __u64 stat_encrypt_cnt; + __u64 stat_encrypt_tlen; + __u64 stat_decrypt_cnt; + __u64 stat_decrypt_tlen; + __u64 stat_cipher_err_cnt; +}; + +struct crypto_stat_compress { + char type[CRYPTO_MAX_NAME]; + __u64 stat_compress_cnt; + __u64 stat_compress_tlen; + __u64 stat_decompress_cnt; + __u64 stat_decompress_tlen; + __u64 stat_compress_err_cnt; +}; + +struct crypto_stat_hash { + char type[CRYPTO_MAX_NAME]; + __u64 stat_hash_cnt; + __u64 stat_hash_tlen; + __u64 stat_hash_err_cnt; +}; + +struct crypto_stat_kpp { + char type[CRYPTO_MAX_NAME]; + __u64 stat_setsecret_cnt; + __u64 stat_generate_public_key_cnt; + __u64 stat_compute_shared_secret_cnt; + __u64 stat_kpp_err_cnt; +}; + +struct crypto_stat_rng { + char type[CRYPTO_MAX_NAME]; + __u64 stat_generate_cnt; + __u64 stat_generate_tlen; + __u64 stat_seed_cnt; + __u64 stat_rng_err_cnt; +}; + +struct crypto_stat_larval { + char type[CRYPTO_MAX_NAME]; }; struct crypto_report_larval { -- cgit v1.2.3 From 44f13133cb03ec32fc88a533673248ef5c0617e3 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 29 Nov 2018 14:42:25 +0000 Subject: crypto: user - rename err_cnt parameter Since now all crypto stats are on their own structures, it is now useless to have the algorithm name in the err_cnt member. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- include/uapi/linux/cryptouser.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 3a70f025e27d..4dc1603919ce 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -82,7 +82,7 @@ struct crypto_stat_aead { __u64 stat_encrypt_tlen; __u64 stat_decrypt_cnt; __u64 stat_decrypt_tlen; - __u64 stat_aead_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_akcipher { @@ -93,7 +93,7 @@ struct crypto_stat_akcipher { __u64 stat_decrypt_tlen; __u64 stat_verify_cnt; __u64 stat_sign_cnt; - __u64 stat_akcipher_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_cipher { @@ -102,7 +102,7 @@ struct crypto_stat_cipher { __u64 stat_encrypt_tlen; __u64 stat_decrypt_cnt; __u64 stat_decrypt_tlen; - __u64 stat_cipher_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_compress { @@ -111,14 +111,14 @@ struct crypto_stat_compress { __u64 stat_compress_tlen; __u64 stat_decompress_cnt; __u64 stat_decompress_tlen; - __u64 stat_compress_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_hash { char type[CRYPTO_MAX_NAME]; __u64 stat_hash_cnt; __u64 stat_hash_tlen; - __u64 stat_hash_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_kpp { @@ -126,7 +126,7 @@ struct crypto_stat_kpp { __u64 stat_setsecret_cnt; __u64 stat_generate_public_key_cnt; __u64 stat_compute_shared_secret_cnt; - __u64 stat_kpp_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_rng { @@ -134,7 +134,7 @@ struct crypto_stat_rng { __u64 stat_generate_cnt; __u64 stat_generate_tlen; __u64 stat_seed_cnt; - __u64 stat_rng_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_larval { -- cgit v1.2.3 From 52ea899637c746984d657b508da6e3f2686adfca Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Wed, 5 Dec 2018 10:42:21 +1000 Subject: Input: add `REL_WHEEL_HI_RES` and `REL_HWHEEL_HI_RES` This event code represents scroll reports from high-resolution wheels and is modelled after the approach Windows uses. The value 120 is one detent (wheel click) of movement. Mice with higher-resolution scrolling can send fractions of 120 which must be accumulated in userspace. Userspace can either wait for a full 120 to accumulate or scroll by fractions of one logical scroll movement as the events come in. 120 was picked as magic number because it has a high number of integer fractions that can be used by high-resolution wheels. For more information see https://docs.microsoft.com/en-us/previous-versions/windows/hardware/design/dn613912(v=vs.85) These new axes obsolete REL_WHEEL and REL_HWHEEL. The legacy axes are emulated by the kernel but the most accurate (and most granular) data is available through the new axes. Signed-off-by: Peter Hutterer Acked-by: Dmitry Torokhov Verified-by: Harry Cutts Signed-off-by: Benjamin Tissoires --- include/uapi/linux/input-event-codes.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 3eb5a4c3d60a..265ef2028660 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -716,6 +716,8 @@ * the situation described above. */ #define REL_RESERVED 0x0a +#define REL_WHEEL_HI_RES 0x0b +#define REL_HWHEEL_HI_RES 0x0c #define REL_MAX 0x0f #define REL_CNT (REL_MAX+1) -- cgit v1.2.3 From 1dde0ea95b782425b95455d487cb44991525a1d1 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 20 Nov 2018 21:00:29 -0500 Subject: drm/amdkfd: Add DMABuf import functionality This is used for interoperability between ROCm compute and graphics APIs. It allows importing graphics driver BOs into the ROCm SVM address space for zero-copy GPU access. The API is split into two steps (query and import) to allow user mode to manage the virtual address space allocation for the imported buffer. Acked-by: Alex Deucher Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index b01eb502d49c..e622fd1fbd46 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -398,6 +398,24 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { __u32 n_success; /* to/from KFD */ }; +struct kfd_ioctl_get_dmabuf_info_args { + __u64 size; /* from KFD */ + __u64 metadata_ptr; /* to KFD */ + __u32 metadata_size; /* to KFD (space allocated by user) + * from KFD (actual metadata size) + */ + __u32 gpu_id; /* from KFD */ + __u32 flags; /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */ + __u32 dmabuf_fd; /* to KFD */ +}; + +struct kfd_ioctl_import_dmabuf_args { + __u64 va_addr; /* to KFD */ + __u64 handle; /* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 dmabuf_fd; /* to KFD */ +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -486,7 +504,13 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { #define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \ AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args) +#define AMDKFD_IOC_GET_DMABUF_INFO \ + AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args) + +#define AMDKFD_IOC_IMPORT_DMABUF \ + AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1C +#define AMDKFD_COMMAND_END 0x1E #endif -- cgit v1.2.3 From c454a46b5efd8eff8880e88ece2976e60a26bf35 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 7 Dec 2018 16:42:25 -0800 Subject: bpf: Add bpf_line_info support This patch adds bpf_line_info support. It accepts an array of bpf_line_info objects during BPF_PROG_LOAD. The "line_info", "line_info_cnt" and "line_info_rec_size" are added to the "union bpf_attr". The "line_info_rec_size" makes bpf_line_info extensible in the future. The new "check_btf_line()" ensures the userspace line_info is valid for the kernel to use. When the verifier is translating/patching the bpf_prog (through "bpf_patch_insn_single()"), the line_infos' insn_off is also adjusted by the newly added "bpf_adj_linfo()". If the bpf_prog is jited, this patch also provides the jited addrs (in aux->jited_linfo) for the corresponding line_info.insn_off. "bpf_prog_fill_jited_linfo()" is added to fill the aux->jited_linfo. It is currently called by the x86 jit. Other jits can also use "bpf_prog_fill_jited_linfo()" and it will be done in the followup patches. In the future, if it deemed necessary, a particular jit could also provide its own "bpf_prog_fill_jited_linfo()" implementation. A few "*line_info*" fields are added to the bpf_prog_info such that the user can get the xlated line_info back (i.e. the line_info with its insn_off reflecting the translated prog). The jited_line_info is available if the prog is jited. It is an array of __u64. If the prog is not jited, jited_line_info_cnt is 0. The verifier's verbose log with line_info will be done in a follow up patch. Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a84fd232d934..7a66db8d15d5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -356,6 +356,9 @@ union bpf_attr { __u32 func_info_rec_size; /* userspace bpf_func_info size */ __aligned_u64 func_info; /* func info */ __u32 func_info_cnt; /* number of bpf_func_info records */ + __u32 line_info_rec_size; /* userspace bpf_line_info size */ + __aligned_u64 line_info; /* line info */ + __u32 line_info_cnt; /* number of bpf_line_info records */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -2679,6 +2682,12 @@ struct bpf_prog_info { __u32 func_info_rec_size; __aligned_u64 func_info; __u32 func_info_cnt; + __u32 line_info_cnt; + __aligned_u64 line_info; + __aligned_u64 jited_line_info; + __u32 jited_line_info_cnt; + __u32 line_info_rec_size; + __u32 jited_line_info_rec_size; } __attribute__((aligned(8))); struct bpf_map_info { @@ -2995,4 +3004,14 @@ struct bpf_func_info { __u32 type_id; }; +#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10) +#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) + +struct bpf_line_info { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 01d3240a04f4c09392e13c77b54d4423ebce2d72 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 6 Dec 2018 13:01:03 +0000 Subject: media: bpf: add bpf function to report mouse movement Some IR remotes have a directional pad or other pointer-like thing that can be used as a mouse. Make it possible to decode these types of IR protocols in BPF. Cc: netdev@vger.kernel.org Signed-off-by: Sean Young Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7a66db8d15d5..1bee1135866a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2301,6 +2301,20 @@ union bpf_attr { * payload and/or *pop* value being to large. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded pointer movement. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * Return + * 0 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2394,7 +2408,8 @@ union bpf_attr { FN(map_pop_elem), \ FN(map_peek_elem), \ FN(msg_push_data), \ - FN(msg_pop_data), + FN(msg_pop_data), \ + FN(rc_pointer_rel), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 11d8b82d2222cade12caad2c125f23023777dcbc Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 10 Dec 2018 14:14:08 -0800 Subject: bpf: rename *_info_cnt to nr_*_info in bpf_prog_info In uapi bpf.h, currently we have the following fields in the struct bpf_prog_info: __u32 func_info_cnt; __u32 line_info_cnt; __u32 jited_line_info_cnt; The above field names "func_info_cnt" and "line_info_cnt" also appear in union bpf_attr for program loading. The original intention is to keep the names the same between bpf_prog_info and bpf_attr so it will imply what we returned to user space will be the same as what the user space passed to the kernel. Such a naming convention in bpf_prog_info is not consistent with other fields like: __u32 nr_jited_ksyms; __u32 nr_jited_func_lens; This patch made this adjustment so in bpf_prog_info newly introduced *_info_cnt becomes nr_*_info. Acked-by: Song Liu Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1bee1135866a..f943ed803309 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2696,11 +2696,11 @@ struct bpf_prog_info { __u32 btf_id; __u32 func_info_rec_size; __aligned_u64 func_info; - __u32 func_info_cnt; - __u32 line_info_cnt; + __u32 nr_func_info; + __u32 nr_line_info; __aligned_u64 line_info; __aligned_u64 jited_line_info; - __u32 jited_line_info_cnt; + __u32 nr_jited_line_info; __u32 line_info_rec_size; __u32 jited_line_info_rec_size; } __attribute__((aligned(8))); -- cgit v1.2.3 From 0bd72117fba2dd51a65eaa7b480adc0eea9a4409 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 11 Dec 2018 10:26:33 +0100 Subject: bpf: fix up uapi helper description and sync bpf header with tools Minor markup fixup from bpf-next into net-next merge in the BPF helper description of bpf_sk_lookup_tcp() and bpf_sk_lookup_udp(). Also sync up the copy of bpf.h from tooling infrastructure. Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 92e962ba0c47..aa582cd5bfcf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2218,9 +2218,9 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, the *struct bpf_sock* - * result is from reuse->socks[] using the hash of the tuple. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description @@ -2254,9 +2254,9 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, the *struct bpf_sock* - * result is from reuse->socks[] using the hash of the tuple. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * * int bpf_sk_release(struct bpf_sock *sock) * Description -- cgit v1.2.3 From 6a21cc50f0c7f87dae5259f6cfefe024412313f6 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Sun, 9 Dec 2018 11:24:13 -0700 Subject: seccomp: add a return code to trap to userspace This patch introduces a means for syscalls matched in seccomp to notify some other task that a particular filter has been triggered. The motivation for this is primarily for use with containers. For example, if a container does an init_module(), we obviously don't want to load this untrusted code, which may be compiled for the wrong version of the kernel anyway. Instead, we could parse the module image, figure out which module the container is trying to load and load it on the host. As another example, containers cannot mount() in general since various filesystems assume a trusted image. However, if an orchestrator knows that e.g. a particular block device has not been exposed to a container for writing, it want to allow the container to mount that block device (that is, handle the mount for it). This patch adds functionality that is already possible via at least two other means that I know about, both of which involve ptrace(): first, one could ptrace attach, and then iterate through syscalls via PTRACE_SYSCALL. Unfortunately this is slow, so a faster version would be to install a filter that does SECCOMP_RET_TRACE, which triggers a PTRACE_EVENT_SECCOMP. Since ptrace allows only one tracer, if the container runtime is that tracer, users inside the container (or outside) trying to debug it will not be able to use ptrace, which is annoying. It also means that older distributions based on Upstart cannot boot inside containers using ptrace, since upstart itself uses ptrace to monitor services while starting. The actual implementation of this is fairly small, although getting the synchronization right was/is slightly complex. Finally, it's worth noting that the classic seccomp TOCTOU of reading memory data from the task still applies here, but can be avoided with careful design of the userspace handler: if the userspace handler reads all of the task memory that is necessary before applying its security policy, the tracee's subsequent memory edits will not be read by the tracer. Signed-off-by: Tycho Andersen CC: Kees Cook CC: Andy Lutomirski CC: Oleg Nesterov CC: Eric W. Biederman CC: "Serge E. Hallyn" Acked-by: Serge Hallyn CC: Christian Brauner CC: Tyler Hicks CC: Akihiro Suda Signed-off-by: Kees Cook --- include/uapi/linux/seccomp.h | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 9efc0e73d50b..90734aa5aa36 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -15,11 +15,13 @@ #define SECCOMP_SET_MODE_STRICT 0 #define SECCOMP_SET_MODE_FILTER 1 #define SECCOMP_GET_ACTION_AVAIL 2 +#define SECCOMP_GET_NOTIF_SIZES 3 /* Valid flags for SECCOMP_SET_MODE_FILTER */ -#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) -#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) -#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) +#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) /* * All BPF programs must return a 32-bit value. @@ -35,6 +37,7 @@ #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */ #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ @@ -60,4 +63,35 @@ struct seccomp_data { __u64 args[6]; }; +struct seccomp_notif_sizes { + __u16 seccomp_notif; + __u16 seccomp_notif_resp; + __u16 seccomp_data; +}; + +struct seccomp_notif { + __u64 id; + __u32 pid; + __u32 flags; + struct seccomp_data data; +}; + +struct seccomp_notif_resp { + __u64 id; + __s64 val; + __s32 error; + __u32 flags; +}; + +#define SECCOMP_IOC_MAGIC '!' +#define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) +#define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) + +/* Flags for seccomp notification fd ioctl. */ +#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) +#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ + struct seccomp_notif_resp) +#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) #endif /* _UAPI_LINUX_SECCOMP_H */ -- cgit v1.2.3 From c872bdb38febb4c31ece3599c52cf1f833b89f4e Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 12 Dec 2018 09:37:46 -0800 Subject: bpf: include sub program tags in bpf_prog_info Changes v2 -> v3: 1. remove check for bpf_dump_raw_ok(). Changes v1 -> v2: 1. Fix error path as Martin suggested. This patch adds nr_prog_tags and prog_tags to bpf_prog_info. This is a reliable way for user space to get tags of all sub programs. Before this patch, user space need to find sub program tags via kallsyms. This feature will be used in BPF introspection, where user space queries information about BPF programs via sys_bpf. Signed-off-by: Song Liu Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index aa582cd5bfcf..e7d57e89f25f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2717,6 +2717,8 @@ struct bpf_prog_info { __u32 nr_jited_line_info; __u32 line_info_rec_size; __u32 jited_line_info_rec_size; + __u32 nr_prog_tags; + __aligned_u64 prog_tags; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3 From ec6e822d1a22d0eef1d1fa260dff751dba9a4258 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 7 Dec 2018 18:39:26 +0000 Subject: arm64: expose user PAC bit positions via ptrace When pointer authentication is in use, data/instruction pointers have a number of PAC bits inserted into them. The number and position of these bits depends on the configured TCR_ELx.TxSZ and whether tagging is enabled. ARMv8.3 allows tagging to differ for instruction and data pointers. For userspace debuggers to unwind the stack and/or to follow pointer chains, they need to be able to remove the PAC bits before attempting to use a pointer. This patch adds a new structure with masks describing the location of the PAC bits in userspace instruction and data pointers (i.e. those addressable via TTBR0), which userspace can query via PTRACE_GETREGSET. By clearing these bits from pointers (and replacing them with the value of bit 55), userspace can acquire the PAC-less versions. This new regset is exposed when the kernel is built with (user) pointer authentication support, and the address authentication feature is enabled. Otherwise, the regset is hidden. Reviewed-by: Richard Henderson Signed-off-by: Mark Rutland Signed-off-by: Kristina Martsenko Cc: Catalin Marinas Cc: Ramana Radhakrishnan Cc: Will Deacon [will: Fix to use vabits_user instead of VA_BITS and rename macro] Signed-off-by: Will Deacon --- include/uapi/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index c5358e0ae7c5..3f23273d690c 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -420,6 +420,7 @@ typedef struct elf64_shdr { #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ #define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */ +#define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3 From ba830885656414101b2f8ca88786524d4bb5e8c1 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Fri, 7 Dec 2018 18:39:28 +0000 Subject: arm64: add prctl control for resetting ptrauth keys Add an arm64-specific prctl to allow a thread to reinitialize its pointer authentication keys to random values. This can be useful when exec() is not used for starting new processes, to ensure that different processes still have different keys. Signed-off-by: Kristina Martsenko Signed-off-by: Will Deacon --- include/uapi/linux/prctl.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index c0d7ea0bf5b6..0f535a501391 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -219,4 +219,12 @@ struct prctl_mm_map { # define PR_SPEC_DISABLE (1UL << 2) # define PR_SPEC_FORCE_DISABLE (1UL << 3) +/* Reset arm64 pointer authentication keys */ +#define PR_PAC_RESET_KEYS 54 +# define PR_PAC_APIAKEY (1UL << 0) +# define PR_PAC_APIBKEY (1UL << 1) +# define PR_PAC_APDAKEY (1UL << 2) +# define PR_PAC_APDBKEY (1UL << 3) +# define PR_PAC_APGAKEY (1UL << 4) + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 72148d1a57e7c76745e68c94ad5d235240d26ac8 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 16 Jun 2017 08:38:31 -0400 Subject: media: v4l: Add support for V4L2_BUF_TYPE_META_OUTPUT The V4L2_BUF_TYPE_META_OUTPUT mirrors the V4L2_BUF_TYPE_META_CAPTURE with the exception that it is an OUTPUT type. The use case for this is to pass buffers to the device that are not image data but metadata. The formats, just as the metadata capture formats, are typically device specific and highly structured. Signed-off-by: Sakari Ailus Acked-by: Hans Verkuil Reviewed-by: Tomasz Figa Tested-by: Tian Shu Qiu Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 2db1635de956..a9d47b1b9437 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -145,6 +145,7 @@ enum v4l2_buf_type { V4L2_BUF_TYPE_SDR_CAPTURE = 11, V4L2_BUF_TYPE_SDR_OUTPUT = 12, V4L2_BUF_TYPE_META_CAPTURE = 13, + V4L2_BUF_TYPE_META_OUTPUT = 14, /* Deprecated, do not use */ V4L2_BUF_TYPE_PRIVATE = 0x80, }; @@ -469,6 +470,7 @@ struct v4l2_capability { #define V4L2_CAP_READWRITE 0x01000000 /* read/write systemcalls */ #define V4L2_CAP_ASYNCIO 0x02000000 /* async I/O */ #define V4L2_CAP_STREAMING 0x04000000 /* streaming I/O ioctls */ +#define V4L2_CAP_META_OUTPUT 0x08000000 /* Is a metadata output device */ #define V4L2_CAP_TOUCH 0x10000000 /* Is a touch device */ -- cgit v1.2.3 From 2a31b9db153530df4aa02dac8c32837bf5f47019 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 23 Oct 2018 02:36:47 +0200 Subject: kvm: introduce manual dirty log reprotect There are two problems with KVM_GET_DIRTY_LOG. First, and less important, it can take kvm->mmu_lock for an extended period of time. Second, its user can actually see many false positives in some cases. The latter is due to a benign race like this: 1. KVM_GET_DIRTY_LOG returns a set of dirty pages and write protects them. 2. The guest modifies the pages, causing them to be marked ditry. 3. Userspace actually copies the pages. 4. KVM_GET_DIRTY_LOG returns those pages as dirty again, even though they were not written to since (3). This is especially a problem for large guests, where the time between (1) and (3) can be substantial. This patch introduces a new capability which, when enabled, makes KVM_GET_DIRTY_LOG not write-protect the pages it returns. Instead, userspace has to explicitly clear the dirty log bits just before using the content of the page. The new KVM_CLEAR_DIRTY_LOG ioctl can also operate on a 64-page granularity rather than requiring to sync a full memslot; this way, the mmu_lock is taken for small amounts of time, and only a small amount of time will pass between write protection of pages and the sending of their content. Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 2b7a652c9fa4..9fe35f1ac938 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -492,6 +492,17 @@ struct kvm_dirty_log { }; }; +/* for KVM_CLEAR_DIRTY_LOG */ +struct kvm_clear_dirty_log { + __u32 slot; + __u32 num_pages; + __u64 first_page; + union { + void __user *dirty_bitmap; /* one bit per page */ + __u64 padding2; + }; +}; + /* for KVM_SET_SIGNAL_MASK */ struct kvm_signal_mask { __u32 len; @@ -975,6 +986,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 #define KVM_CAP_EXCEPTION_PAYLOAD 164 #define KVM_CAP_ARM_VM_IPA_SIZE 165 +#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 #ifdef KVM_CAP_IRQ_ROUTING @@ -1421,6 +1433,9 @@ struct kvm_enc_region { #define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state) #define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state) +/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */ +#define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ -- cgit v1.2.3 From 2bc39970e9327ceb06cb210f86ba35f81d00e350 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 10 Dec 2018 18:21:56 +0100 Subject: x86/kvm/hyper-v: Introduce KVM_GET_SUPPORTED_HV_CPUID With every new Hyper-V Enlightenment we implement we're forced to add a KVM_CAP_HYPERV_* capability. While this approach works it is fairly inconvenient: the majority of the enlightenments we do have corresponding CPUID feature bit(s) and userspace has to know this anyways to be able to expose the feature to the guest. Add KVM_GET_SUPPORTED_HV_CPUID ioctl (backed by KVM_CAP_HYPERV_CPUID, "one cap to rule them all!") returning all Hyper-V CPUID feature leaves. Using the existing KVM_GET_SUPPORTED_CPUID doesn't seem to be possible: Hyper-V CPUID feature leaves intersect with KVM's (e.g. 0x40000000, 0x40000001) and we would probably confuse userspace in case we decide to return these twice. KVM_CAP_HYPERV_CPUID's number is interim: we're intended to drop KVM_CAP_HYPERV_STIMER_DIRECT and use its number instead. Suggested-by: Paolo Bonzini Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 9fe35f1ac938..6d4ea4b6c922 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -987,6 +987,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_EXCEPTION_PAYLOAD 164 #define KVM_CAP_ARM_VM_IPA_SIZE 165 #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 +#define KVM_CAP_HYPERV_CPUID 167 #ifdef KVM_CAP_IRQ_ROUTING @@ -1436,6 +1437,9 @@ struct kvm_enc_region { /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */ #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) +/* Available with KVM_CAP_HYPERV_CPUID */ +#define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ -- cgit v1.2.3 From 17abc9ec68b73ddeb262a507a62421016b9c54d5 Mon Sep 17 00:00:00 2001 From: Tomasz Duszynski Date: Fri, 14 Dec 2018 19:28:01 +0100 Subject: iio: add IIO_MASSCONCENTRATION channel type Measuring particulate matter in ug / m3 (micro-grams per cubic meter) is de facto standard. Existing air quality sensors usually follow this convention and are capable of returning measurements using this unit. IIO currently does not offer suitable channel type for this type of measurements hence this patch adds this. In addition, extra modifiers are introduced used for distinguishing between fine pm1, pm2p5 and coarse pm4, pm10 particle measurements, i.e IIO_MOD_PM1, IIO_MOD_PM25 and IIO_MOD_PM4, IIO_MOD_PM10. pmX consists of particles with aerodynamic diameter less or equal to X micrometers. Signed-off-by: Tomasz Duszynski Acked-by: Matt Ranostay Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 92baabc103ac..c59adac24b1c 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -46,6 +46,7 @@ enum iio_chan_type { IIO_GRAVITY, IIO_POSITIONRELATIVE, IIO_PHASE, + IIO_MASSCONCENTRATION, }; enum iio_modifier { @@ -87,6 +88,10 @@ enum iio_modifier { IIO_MOD_VOC, IIO_MOD_LIGHT_UV, IIO_MOD_LIGHT_DUV, + IIO_MOD_PM1, + IIO_MOD_PM2P5, + IIO_MOD_PM4, + IIO_MOD_PM10, }; enum iio_event_type { -- cgit v1.2.3 From b170f7d48443d1ea3e4ffbf409025b5e5b1146fe Mon Sep 17 00:00:00 2001 From: Andreas Brauchli Date: Thu, 13 Dec 2018 15:43:22 +0100 Subject: iio: Add modifiers for ethanol and H2 gases Add ethanol and H2 gas modifiers: * IIO_MOD_ETHANOL * IIO_MOD_H2 Signed-off-by: Andreas Brauchli Acked-by: Matt Ranostay Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index c59adac24b1c..fdd81affca4b 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -92,6 +92,8 @@ enum iio_modifier { IIO_MOD_PM2P5, IIO_MOD_PM4, IIO_MOD_PM10, + IIO_MOD_ETHANOL, + IIO_MOD_H2, }; enum iio_event_type { -- cgit v1.2.3 From df9b0e30d44c901ac27c0f38cd54511b3f130c6d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 15 Dec 2018 14:09:06 -0800 Subject: neighbor: Add protocol attribute Similar to routes and rules, add protocol attribute to neighbor entries for easier tracking of how each was created. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 998155444e0d..cd144e3099a3 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -28,6 +28,7 @@ enum { NDA_MASTER, NDA_LINK_NETNSID, NDA_SRC_VNI, + NDA_PROTOCOL, /* Originator of entry */ __NDA_MAX }; -- cgit v1.2.3 From 20427e5db3f96fc054c6a6ad95606906b834deb1 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 3 Dec 2018 20:56:47 +0100 Subject: mmc: document 'Reliable Write' bit in uapi header If we use it this way, people should know about it. Also, replace true/false with nonzero/zero because the flag is not strictly a bool anymore. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Signed-off-by: Ulf Hansson --- include/uapi/linux/mmc/ioctl.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mmc/ioctl.h b/include/uapi/linux/mmc/ioctl.h index 45f369dc0a42..00c08120f3ba 100644 --- a/include/uapi/linux/mmc/ioctl.h +++ b/include/uapi/linux/mmc/ioctl.h @@ -5,7 +5,10 @@ #include struct mmc_ioc_cmd { - /* Implies direction of data. true = write, false = read */ + /* + * Direction of data: nonzero = write, zero = read. + * Bit 31 selects 'Reliable Write' for RPMB. + */ int write_flag; /* Application-specific command. true = precede with CMD55 */ -- cgit v1.2.3 From 7239ff4b2be8ec0c3160da7fdd1475785fdb4cb9 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 30 Oct 2018 16:43:23 +0200 Subject: btrfs: Introduce support for FSID change without metadata rewrite This field is going to be used when the user wants to change the UUID of the filesystem without having to rewrite all metadata blocks. This field adds another level of indirection such that when the FSID is changed what really happens is the current UUID (the one with which the fs was created) is copied to the 'metadata_uuid' field in the superblock as well as a new incompat flag is set METADATA_UUID. When the kernel detects this flag is set it knows that the superblock in fact has 2 UUIDs: 1. Is the UUID which is user-visible, currently known as FSID. 2. Metadata UUID - this is the UUID which is stamped into all on-disk datastructures belonging to this file system. When the new incompat flag is present device scanning checks whether both fsid/metadata_uuid of the scanned device match any of the registered filesystems. When the flag is not set then both UUIDs are equal and only the FSID is retained on disk, metadata_uuid is set only in-memory during mount. Additionally a new metadata_uuid field is also added to the fs_info struct. It's initialised either with the FSID in case METADATA_UUID incompat flag is not set or with the metdata_uuid of the superblock otherwise. This commit introduces the new fields as well as the new incompat flag and switches all users of the fsid to the new logic. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba [ minor updates in comments ] Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 1 + include/uapi/linux/btrfs_tree.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 5ca1d21fc4a7..e0763bc4158e 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -269,6 +269,7 @@ struct btrfs_ioctl_fs_info_args { #define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7) #define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8) #define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9) +#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10) struct btrfs_ioctl_feature_flags { __u64 compat_flags; diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index aff1356c2bb8..e974f4bb5378 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -458,6 +458,7 @@ struct btrfs_free_space_header { #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) #define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) #define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35) +#define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36) /* -- cgit v1.2.3 From 6c4fc209fcf9d27efbaa48368773e4d2bfbd59aa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 16 Dec 2018 00:49:47 +0100 Subject: bpf: remove useless version check for prog load Existing libraries and tracing frameworks work around this kernel version check by automatically deriving the kernel version from uname(3) or similar such that the user does not need to do it manually; these workarounds also make the version check useless at the same time. Moreover, most other BPF tracing types enabling bpf_probe_read()-like functionality have /not/ adapted this check, and in general these days it is well understood anyway that all the tracing programs are not stable with regards to future kernels as kernel internal data structures are subject to change from release to release. Back at last netconf we discussed [0] and agreed to remove this check from bpf_prog_load() and instead document it here in the uapi header that there is no such guarantee for stable API for these programs. [0] http://vger.kernel.org/netconf2018_files/DanielBorkmann_netconf2018.pdf Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e7d57e89f25f..1d324c2cbca2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -133,6 +133,14 @@ enum bpf_map_type { BPF_MAP_TYPE_STACK, }; +/* Note that tracing related programs such as + * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT} + * are not subject to a stable API since kernel internal data + * structures can change from release to release and may + * therefore break existing tracing BPF programs. Tracing BPF + * programs correspond to /a/ specific kernel which is to be + * analyzed, and not /a/ specific kernel /and/ all future ones. + */ enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, @@ -343,7 +351,7 @@ union bpf_attr { __u32 log_level; /* verbosity level of verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ - __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 kern_version; /* not used */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; __u32 prog_ifindex; /* ifindex of netdev to prep for */ -- cgit v1.2.3 From b61c41c28eb09ae1bb02479a8f65171c037124c6 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Thu, 13 Dec 2018 20:23:26 +0300 Subject: Move EM_XTENSA to uapi/linux/elf-em.h This should never have been defined in the arch tree to begin with, and now uapi/linux/audit.h header is going to use EM_XTENSA in order to define AUDIT_ARCH_XTENSA which is needed to implement syscall_get_arch() which in turn is required to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Cc: Max Filippov Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Chris Zankel Cc: linux-xtensa@linux-xtensa.org Signed-off-by: Dmitry V. Levin Signed-off-by: Max Filippov --- include/uapi/linux/elf-em.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index 93722e60204c..d2fb964432f3 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -34,6 +34,7 @@ #define EM_M32R 88 /* Renesas M32R */ #define EM_MN10300 89 /* Panasonic/MEI MN10300, AM33 */ #define EM_OPENRISC 92 /* OpenRISC 32-bit embedded processor */ +#define EM_XTENSA 94 /* Tensilica Xtensa Architecture */ #define EM_BLACKFIN 106 /* ADI Blackfin Processor */ #define EM_ALTERA_NIOS2 113 /* Altera Nios II soft-core processor */ #define EM_TI_C6000 140 /* TI C6X DSPs */ -- cgit v1.2.3 From 98c3115a4ec56f03056efd9295e0fcb4c5c57a85 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 20 Nov 2018 03:17:01 +0300 Subject: xtensa: define syscall_get_arch() syscall_get_arch() is required to be implemented on all architectures in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Signed-off-by: Dmitry V. Levin Acked-by: Max Filippov Signed-off-by: Max Filippov --- include/uapi/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 818ae690ab79..9e67fd359d58 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -411,6 +411,7 @@ enum { #define AUDIT_ARCH_TILEGX32 (EM_TILEGX|__AUDIT_ARCH_LE) #define AUDIT_ARCH_TILEPRO (EM_TILEPRO|__AUDIT_ARCH_LE) #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_XTENSA (EM_XTENSA) #define AUDIT_PERM_EXEC 1 #define AUDIT_PERM_WRITE 2 -- cgit v1.2.3 From 9d5f9f701b1891466fb3dbb1806ad97716f95cc3 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 15 Dec 2018 22:13:51 -0800 Subject: bpf: btf: fix struct/union/fwd types with kind_flag This patch fixed two issues with BTF. One is related to struct/union bitfield encoding and the other is related to forward type. Issue #1 and solution: ====================== Current btf encoding of bitfield follows what pahole generates. For each bitfield, pahole will duplicate the type chain and put the bitfield size at the final int or enum type. Since the BTF enum type cannot encode bit size, pahole workarounds the issue by generating an int type whenever the enum bit size is not 32. For example, -bash-4.4$ cat t.c typedef int ___int; enum A { A1, A2, A3 }; struct t { int a[5]; ___int b:4; volatile enum A c:4; } g; -bash-4.4$ gcc -c -O2 -g t.c The current kernel supports the following BTF encoding: $ pahole -JV t.o [1] TYPEDEF ___int type_id=2 [2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED [3] ENUM A size=4 vlen=3 A1 val=0 A2 val=1 A3 val=2 [4] STRUCT t size=24 vlen=3 a type_id=5 bits_offset=0 b type_id=9 bits_offset=160 c type_id=11 bits_offset=164 [5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5 [6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none) [7] VOLATILE (anon) type_id=3 [8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none) [9] TYPEDEF ___int type_id=8 [10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED [11] VOLATILE (anon) type_id=10 Two issues are in the above: . by changing enum type to int, we lost the original type information and this will not be ideal later when we try to convert BTF to a header file. . the type duplication for bitfields will cause BTF bloat. Duplicated types cannot be deduplicated later if the bitfield size is different. To fix this issue, this patch implemented a compatible change for BTF struct type encoding: . the bit 31 of struct_type->info, previously reserved, now is used to indicate whether bitfield_size is encoded in btf_member or not. . if bit 31 of struct_type->info is set, btf_member->offset will encode like: bit 0 - 23: bit offset bit 24 - 31: bitfield size if bit 31 is not set, the old behavior is preserved: bit 0 - 31: bit offset So if the struct contains a bit field, the maximum bit offset will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum bitfield size will be 256 which is enough for today as maximum bitfield in compiler can be 128 where int128 type is supported. This kernel patch intends to support the new BTF encoding: $ pahole -JV t.o [1] TYPEDEF ___int type_id=2 [2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED [3] ENUM A size=4 vlen=3 A1 val=0 A2 val=1 A3 val=2 [4] STRUCT t kind_flag=1 size=24 vlen=3 a type_id=5 bitfield_size=0 bits_offset=0 b type_id=1 bitfield_size=4 bits_offset=160 c type_id=7 bitfield_size=4 bits_offset=164 [5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5 [6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none) [7] VOLATILE (anon) type_id=3 Issue #2 and solution: ====================== Current forward type in BTF does not specify whether the original type is struct or union. This will not work for type pretty print and BTF-to-header-file conversion as struct/union must be specified. $ cat tt.c struct t; union u; int foo(struct t *t, union u *u) { return 0; } $ gcc -c -g -O2 tt.c $ pahole -JV tt.o [1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED [2] FWD t type_id=0 [3] PTR (anon) type_id=2 [4] FWD u type_id=0 [5] PTR (anon) type_id=4 To fix this issue, similar to issue #1, type->info bit 31 is used. If the bit is set, it is union type. Otherwise, it is a struct type. $ pahole -JV tt.o [1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED [2] FWD t kind_flag=0 type_id=0 [3] PTR (anon) kind_flag=0 type_id=2 [4] FWD u kind_flag=1 type_id=0 [5] PTR (anon) kind_flag=0 type_id=4 Pahole/LLVM change: =================== The new kind_flag functionality has been implemented in pahole and llvm: https://github.com/yonghong-song/pahole/tree/bitfield https://github.com/yonghong-song/llvm/tree/bitfield Note that pahole hasn't implemented func/func_proto kind and .BTF.ext. So to print function signature with bpftool, the llvm compiler should be used. Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)") Acked-by: Martin KaFai Lau Signed-off-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- include/uapi/linux/btf.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 14f66948fc95..7b7475ef2f17 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -34,7 +34,9 @@ struct btf_type { * bits 0-15: vlen (e.g. # of struct's members) * bits 16-23: unused * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-31: unused + * bits 28-30: unused + * bit 31: kind_flag, currently used by + * struct, union and fwd */ __u32 info; /* "size" is used by INT, ENUM, STRUCT and UNION. @@ -52,6 +54,7 @@ struct btf_type { #define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) #define BTF_INFO_VLEN(info) ((info) & 0xffff) +#define BTF_INFO_KFLAG(info) ((info) >> 31) #define BTF_KIND_UNKN 0 /* Unknown */ #define BTF_KIND_INT 1 /* Integer */ @@ -110,9 +113,22 @@ struct btf_array { struct btf_member { __u32 name_off; __u32 type; - __u32 offset; /* offset in bits */ + /* If the type info kind_flag is set, the btf_member offset + * contains both member bitfield size and bit offset. The + * bitfield size is set for bitfield members. If the type + * info kind_flag is not set, the offset contains only bit + * offset. + */ + __u32 offset; }; +/* If the struct/union type info kind_flag is set, the + * following two macros are used to access bitfield_size + * and bit_offset from btf_member.offset. + */ +#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24) +#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff) + /* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". * The exact number of btf_param is stored in the vlen (of the * info in "struct btf_type"). -- cgit v1.2.3 From 30db641ef4f68054db9b191b6c0200fb1a96d458 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 15 Dec 2018 11:03:23 +0200 Subject: cfg80211: clarify LCI/civic location documentation The older code and current userspace assumed that this data is the content of the Measurement Report element, starting with the Measurement Token. Clarify this in the documentation. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 2b53c0e949c7..4625a8624ba2 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5893,9 +5893,11 @@ enum nl80211_external_auth_action { * @__NL80211_FTM_RESP_ATTR_INVALID: Invalid * @NL80211_FTM_RESP_ATTR_ENABLED: FTM responder is enabled * @NL80211_FTM_RESP_ATTR_LCI: The content of Measurement Report Element - * (9.4.2.22 in 802.11-2016) with type 8 - LCI (9.4.2.22.10) + * (9.4.2.22 in 802.11-2016) with type 8 - LCI (9.4.2.22.10), + * i.e. starting with the measurement token * @NL80211_FTM_RESP_ATTR_CIVIC: The content of Measurement Report Element - * (9.4.2.22 in 802.11-2016) with type 11 - Civic (Section 9.4.2.22.13) + * (9.4.2.22 in 802.11-2016) with type 11 - Civic (Section 9.4.2.22.13), + * i.e. starting with the measurement token * @__NL80211_FTM_RESP_ATTR_LAST: Internal * @NL80211_FTM_RESP_ATTR_MAX: highest FTM responder attribute. */ @@ -6295,9 +6297,15 @@ enum nl80211_peer_measurement_ftm_failure_reasons { * @NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE: distance variance (u64, mm^2, note * that standard deviation is the square root of variance, optional) * @NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD: distance spread (u64, mm, optional) - * @NL80211_PMSR_FTM_RESP_ATTR_LCI: LCI data from peer (binary, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_LCI: LCI data from peer (binary, optional); + * this is the contents of the Measurement Report Element (802.11-2016 + * 9.4.2.22.1) starting with the Measurement Token, with Measurement + * Type 8. * @NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC: civic location data from peer - * (binary, optional) + * (binary, optional); + * this is the contents of the Measurement Report Element (802.11-2016 + * 9.4.2.22.1) starting with the Measurement Token, with Measurement + * Type 11. * @NL80211_PMSR_FTM_RESP_ATTR_PAD: ignore, for u64/s64 padding only * * @NUM_NL80211_PMSR_FTM_RESP_ATTR: internal -- cgit v1.2.3 From 30c63115e20b70f89b7cfb66b35e2a0ef4b0ef07 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Tue, 4 Dec 2018 17:46:52 +0530 Subject: nl80211: Add support to notify radar event info received from STA Currently radar detection and corresponding channel switch is handled at the AP device. STA ignores these detected radar events since the radar signal can be seen mostly by the AP as well. But in scenarios where a radar signal is seen only at STA, notifying this event to the AP which can trigger a channel switch can be useful. Stations can report such radar events autonomously through Spectrum management (Measurement Report) action frame to its AP. The userspace on processing the report can notify the kernel with the use of the added NL80211_CMD_NOTIFY_RADAR to indicate the detected event and inturn adding the reported channel to NOL. Signed-off-by: Sriram R Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4625a8624ba2..31ae5c7f10e3 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1060,6 +1060,11 @@ * the measurement completed, using the measurement cookie * (%NL80211_ATTR_COOKIE). * + * @NL80211_CMD_NOTIFY_RADAR: Notify the kernel that a radar signal was + * detected and reported by a neighboring device on the channel + * indicated by %NL80211_ATTR_WIPHY_FREQ and other attributes + * determining the width and type. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1278,6 +1283,8 @@ enum nl80211_commands { NL80211_CMD_PEER_MEASUREMENT_RESULT, NL80211_CMD_PEER_MEASUREMENT_COMPLETE, + NL80211_CMD_NOTIFY_RADAR, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ -- cgit v1.2.3 From 3bdbd0228e7555ec745e08469b98e5a0966409d6 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 16 Dec 2018 15:47:04 -0800 Subject: bpf: sockmap, metadata support for reporting size of msg This adds metadata to sk_msg_md for BPF programs to read the sk_msg size. When the SK_MSG program is running under an application that is using sendfile the data is not copied into sk_msg buffers by default. Rather the BPF program uses sk_msg_pull_data to read the bytes in. This avoids doing the costly memcopy instructions when they are not in fact needed. However, if we don't know the size of the sk_msg we have to guess if needed bytes are available by doing a pull request which may fail. By including the size of the sk_msg BPF programs can check the size before issuing sk_msg_pull_data requests. Additionally, the same applies for sendmsg calls when the application provides multiple iovs. Here the BPF program needs to pull in data to update data pointers but its not clear where the data ends without a size parameter. In many cases "guessing" is not easy to do and results in multiple calls to pull and without bounded loops everything gets fairly tricky. Clean this up by including a u32 size field. Note, all writes into sk_msg_md are rejected already from sk_msg_is_valid_access so nothing additional is needed there. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1d324c2cbca2..91c43884f295 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2665,6 +2665,7 @@ struct sk_msg_md { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + __u32 size; /* Total size of sk_msg */ }; struct sk_reuseport_md { -- cgit v1.2.3 From 3ad20fe393b31025bebfc2d76964561f65df48aa Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 14 Dec 2018 13:11:14 +0100 Subject: binder: implement binderfs As discussed at Linux Plumbers Conference 2018 in Vancouver [1] this is the implementation of binderfs. /* Abstract */ binderfs is a backwards-compatible filesystem for Android's binder ipc mechanism. Each ipc namespace will mount a new binderfs instance. Mounting binderfs multiple times at different locations in the same ipc namespace will not cause a new super block to be allocated and hence it will be the same filesystem instance. Each new binderfs mount will have its own set of binder devices only visible in the ipc namespace it has been mounted in. All devices in a new binderfs mount will follow the scheme binder%d and numbering will always start at 0. /* Backwards compatibility */ Devices requested in the Kconfig via CONFIG_ANDROID_BINDER_DEVICES for the initial ipc namespace will work as before. They will be registered via misc_register() and appear in the devtmpfs mount. Specifically, the standard devices binder, hwbinder, and vndbinder will all appear in their standard locations in /dev. Mounting or unmounting the binderfs mount in the initial ipc namespace will have no effect on these devices, i.e. they will neither show up in the binderfs mount nor will they disappear when the binderfs mount is gone. /* binder-control */ Each new binderfs instance comes with a binder-control device. No other devices will be present at first. The binder-control device can be used to dynamically allocate binder devices. All requests operate on the binderfs mount the binder-control device resides in. Assuming a new instance of binderfs has been mounted at /dev/binderfs via mount -t binderfs binderfs /dev/binderfs. Then a request to create a new binder device can be made as illustrated in [2]. Binderfs devices can simply be removed via unlink(). /* Implementation details */ - dynamic major number allocation: When binderfs is registered as a new filesystem it will dynamically allocate a new major number. The allocated major number will be returned in struct binderfs_device when a new binder device is allocated. - global minor number tracking: Minor are tracked in a global idr struct that is capped at BINDERFS_MAX_MINOR. The minor number tracker is protected by a global mutex. This is the only point of contention between binderfs mounts. - struct binderfs_info: Each binderfs super block has its own struct binderfs_info that tracks specific details about a binderfs instance: - ipc namespace - dentry of the binder-control device - root uid and root gid of the user namespace the binderfs instance was mounted in - mountable by user namespace root: binderfs can be mounted by user namespace root in a non-initial user namespace. The devices will be owned by user namespace root. - binderfs binder devices without misc infrastructure: New binder devices associated with a binderfs mount do not use the full misc_register() infrastructure. The misc_register() infrastructure can only create new devices in the host's devtmpfs mount. binderfs does however only make devices appear under its own mountpoint and thus allocates new character device nodes from the inode of the root dentry of the super block. This will have the side-effect that binderfs specific device nodes do not appear in sysfs. This behavior is similar to devpts allocated pts devices and has no effect on the functionality of the ipc mechanism itself. [1]: https://goo.gl/JL2tfX [2]: program to allocate a new binderfs binder device: #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include int main(int argc, char *argv[]) { int fd, ret, saved_errno; size_t len; struct binderfs_device device = { 0 }; if (argc < 2) exit(EXIT_FAILURE); len = strlen(argv[1]); if (len > BINDERFS_MAX_NAME) exit(EXIT_FAILURE); memcpy(device.name, argv[1], len); fd = open("/dev/binderfs/binder-control", O_RDONLY | O_CLOEXEC); if (fd < 0) { printf("%s - Failed to open binder-control device\n", strerror(errno)); exit(EXIT_FAILURE); } ret = ioctl(fd, BINDER_CTL_ADD, &device); saved_errno = errno; close(fd); errno = saved_errno; if (ret < 0) { printf("%s - Failed to allocate new binder device\n", strerror(errno)); exit(EXIT_FAILURE); } printf("Allocated new binder device with major %d, minor %d, and " "name %s\n", device.major, device.minor, device.name); exit(EXIT_SUCCESS); } Cc: Martijn Coenen Cc: Greg Kroah-Hartman Signed-off-by: Christian Brauner Acked-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder_ctl.h | 35 +++++++++++++++++++++++++++++++++ include/uapi/linux/magic.h | 1 + 2 files changed, 36 insertions(+) create mode 100644 include/uapi/linux/android/binder_ctl.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/android/binder_ctl.h b/include/uapi/linux/android/binder_ctl.h new file mode 100644 index 000000000000..65b2efd1a0a5 --- /dev/null +++ b/include/uapi/linux/android/binder_ctl.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2018 Canonical Ltd. + * + */ + +#ifndef _UAPI_LINUX_BINDER_CTL_H +#define _UAPI_LINUX_BINDER_CTL_H + +#include +#include +#include + +#define BINDERFS_MAX_NAME 255 + +/** + * struct binderfs_device - retrieve information about a new binder device + * @name: the name to use for the new binderfs binder device + * @major: major number allocated for binderfs binder devices + * @minor: minor number allocated for the new binderfs binder device + * + */ +struct binderfs_device { + char name[BINDERFS_MAX_NAME + 1]; + __u8 major; + __u8 minor; +}; + +/** + * Allocate a new binder device. + */ +#define BINDER_CTL_ADD _IOWR('b', 1, struct binderfs_device) + +#endif /* _UAPI_LINUX_BINDER_CTL_H */ + diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 96c24478d8ce..f8c00045d537 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -73,6 +73,7 @@ #define DAXFS_MAGIC 0x64646178 #define BINFMTFS_MAGIC 0x42494e4d #define DEVPTS_SUPER_MAGIC 0x1cd1 +#define BINDERFS_SUPER_MAGIC 0x6c6f6f70 #define FUTEXFS_SUPER_MAGIC 0xBAD1DEA #define PIPEFS_MAGIC 0x50495045 #define PROC_SUPER_MAGIC 0x9fa0 -- cgit v1.2.3 From 1f23816b8eb8fdc39990abe166c10a18c16f6b21 Mon Sep 17 00:00:00 2001 From: Changpeng Liu Date: Thu, 1 Nov 2018 15:40:35 -0700 Subject: virtio_blk: add discard and write zeroes support In commit 88c85538, "virtio-blk: add discard and write zeroes features to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio block specification has been extended to add VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES commands. This patch enables support for discard and write zeroes in the virtio-blk driver when the device advertises the corresponding features, VIRTIO_BLK_F_DISCARD and VIRTIO_BLK_F_WRITE_ZEROES. Signed-off-by: Changpeng Liu Signed-off-by: Daniel Verkamp Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index 9ebe4d968dd5..0f99d7b49ede 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -38,6 +38,8 @@ #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ #define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */ #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ +#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ +#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ /* Legacy feature bits */ #ifndef VIRTIO_BLK_NO_LEGACY @@ -86,6 +88,39 @@ struct virtio_blk_config { /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */ __u16 num_queues; + + /* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */ + /* + * The maximum discard sectors (in 512-byte sectors) for + * one segment. + */ + __u32 max_discard_sectors; + /* + * The maximum number of discard segments in a + * discard command. + */ + __u32 max_discard_seg; + /* Discard commands must be aligned to this number of sectors. */ + __u32 discard_sector_alignment; + + /* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */ + /* + * The maximum number of write zeroes sectors (in 512-byte sectors) in + * one segment. + */ + __u32 max_write_zeroes_sectors; + /* + * The maximum number of segments in a write zeroes + * command. + */ + __u32 max_write_zeroes_seg; + /* + * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the + * deallocation of one or more of the sectors. + */ + __u8 write_zeroes_may_unmap; + + __u8 unused1[3]; } __attribute__((packed)); /* @@ -114,6 +149,12 @@ struct virtio_blk_config { /* Get device ID command */ #define VIRTIO_BLK_T_GET_ID 8 +/* Discard command */ +#define VIRTIO_BLK_T_DISCARD 11 + +/* Write zeroes command */ +#define VIRTIO_BLK_T_WRITE_ZEROES 13 + #ifndef VIRTIO_BLK_NO_LEGACY /* Barrier before this op. */ #define VIRTIO_BLK_T_BARRIER 0x80000000 @@ -133,6 +174,19 @@ struct virtio_blk_outhdr { __virtio64 sector; }; +/* Unmap this range (only valid for write zeroes command) */ +#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP 0x00000001 + +/* Discard/write zeroes range for each request. */ +struct virtio_blk_discard_write_zeroes { + /* discard/write zeroes start sector */ + __le64 sector; + /* number of discard/write zeroes sectors */ + __le32 num_sectors; + /* flags for this range */ + __le32 flags; +}; + #ifndef VIRTIO_BLK_NO_LEGACY struct virtio_scsi_inhdr { __virtio32 errors; -- cgit v1.2.3 From 4b86713236e4bd6ea6c881a97711ae039fc4069b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 17 Dec 2018 18:35:09 +0100 Subject: vhost: split structs into a separate header file vhost structs are shared by vhost-kernel and vhost-user. Split them into a separate file to ease copying them into programs that implement either the server or the client side of vhost-user. Signed-off-by: Paolo Bonzini Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost.h | 113 +--------------------------------- include/uapi/linux/vhost_types.h | 128 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 111 deletions(-) create mode 100644 include/uapi/linux/vhost_types.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 84c3de89696a..40d028eed645 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -11,94 +11,9 @@ * device configuration. */ +#include #include -#include #include -#include -#include - -struct vhost_vring_state { - unsigned int index; - unsigned int num; -}; - -struct vhost_vring_file { - unsigned int index; - int fd; /* Pass -1 to unbind from file. */ - -}; - -struct vhost_vring_addr { - unsigned int index; - /* Option flags. */ - unsigned int flags; - /* Flag values: */ - /* Whether log address is valid. If set enables logging. */ -#define VHOST_VRING_F_LOG 0 - - /* Start of array of descriptors (virtually contiguous) */ - __u64 desc_user_addr; - /* Used structure address. Must be 32 bit aligned */ - __u64 used_user_addr; - /* Available structure address. Must be 16 bit aligned */ - __u64 avail_user_addr; - /* Logging support. */ - /* Log writes to used structure, at offset calculated from specified - * address. Address must be 32 bit aligned. */ - __u64 log_guest_addr; -}; - -/* no alignment requirement */ -struct vhost_iotlb_msg { - __u64 iova; - __u64 size; - __u64 uaddr; -#define VHOST_ACCESS_RO 0x1 -#define VHOST_ACCESS_WO 0x2 -#define VHOST_ACCESS_RW 0x3 - __u8 perm; -#define VHOST_IOTLB_MISS 1 -#define VHOST_IOTLB_UPDATE 2 -#define VHOST_IOTLB_INVALIDATE 3 -#define VHOST_IOTLB_ACCESS_FAIL 4 - __u8 type; -}; - -#define VHOST_IOTLB_MSG 0x1 -#define VHOST_IOTLB_MSG_V2 0x2 - -struct vhost_msg { - int type; - union { - struct vhost_iotlb_msg iotlb; - __u8 padding[64]; - }; -}; - -struct vhost_msg_v2 { - __u32 type; - __u32 reserved; - union { - struct vhost_iotlb_msg iotlb; - __u8 padding[64]; - }; -}; - -struct vhost_memory_region { - __u64 guest_phys_addr; - __u64 memory_size; /* bytes */ - __u64 userspace_addr; - __u64 flags_padding; /* No flags are currently specified. */ -}; - -/* All region addresses and sizes must be 4K aligned. */ -#define VHOST_PAGE_SIZE 0x1000 - -struct vhost_memory { - __u32 nregions; - __u32 padding; - struct vhost_memory_region regions[0]; -}; /* ioctls */ @@ -186,31 +101,7 @@ struct vhost_memory { * device. This can be used to stop the ring (e.g. for migration). */ #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) -/* Feature bits */ -/* Log all write descriptors. Can be changed while device is active. */ -#define VHOST_F_LOG_ALL 26 -/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ -#define VHOST_NET_F_VIRTIO_NET_HDR 27 - -/* VHOST_SCSI specific definitions */ - -/* - * Used by QEMU userspace to ensure a consistent vhost-scsi ABI. - * - * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate + - * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage - * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target. - * All the targets under vhost_wwpn can be seen and used by guset. - */ - -#define VHOST_SCSI_ABI_VERSION 1 - -struct vhost_scsi_target { - int abi_version; - char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */ - unsigned short vhost_tpgt; - unsigned short reserved; -}; +/* VHOST_SCSI specific defines */ #define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target) #define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target) diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h new file mode 100644 index 000000000000..c907290ff065 --- /dev/null +++ b/include/uapi/linux/vhost_types.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_VHOST_TYPES_H +#define _LINUX_VHOST_TYPES_H +/* Userspace interface for in-kernel virtio accelerators. */ + +/* vhost is used to reduce the number of system calls involved in virtio. + * + * Existing virtio net code is used in the guest without modification. + * + * This header includes interface used by userspace hypervisor for + * device configuration. + */ + +#include +#include +#include +#include + +struct vhost_vring_state { + unsigned int index; + unsigned int num; +}; + +struct vhost_vring_file { + unsigned int index; + int fd; /* Pass -1 to unbind from file. */ + +}; + +struct vhost_vring_addr { + unsigned int index; + /* Option flags. */ + unsigned int flags; + /* Flag values: */ + /* Whether log address is valid. If set enables logging. */ +#define VHOST_VRING_F_LOG 0 + + /* Start of array of descriptors (virtually contiguous) */ + __u64 desc_user_addr; + /* Used structure address. Must be 32 bit aligned */ + __u64 used_user_addr; + /* Available structure address. Must be 16 bit aligned */ + __u64 avail_user_addr; + /* Logging support. */ + /* Log writes to used structure, at offset calculated from specified + * address. Address must be 32 bit aligned. */ + __u64 log_guest_addr; +}; + +/* no alignment requirement */ +struct vhost_iotlb_msg { + __u64 iova; + __u64 size; + __u64 uaddr; +#define VHOST_ACCESS_RO 0x1 +#define VHOST_ACCESS_WO 0x2 +#define VHOST_ACCESS_RW 0x3 + __u8 perm; +#define VHOST_IOTLB_MISS 1 +#define VHOST_IOTLB_UPDATE 2 +#define VHOST_IOTLB_INVALIDATE 3 +#define VHOST_IOTLB_ACCESS_FAIL 4 + __u8 type; +}; + +#define VHOST_IOTLB_MSG 0x1 +#define VHOST_IOTLB_MSG_V2 0x2 + +struct vhost_msg { + int type; + union { + struct vhost_iotlb_msg iotlb; + __u8 padding[64]; + }; +}; + +struct vhost_msg_v2 { + __u32 type; + __u32 reserved; + union { + struct vhost_iotlb_msg iotlb; + __u8 padding[64]; + }; +}; + +struct vhost_memory_region { + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ + __u64 userspace_addr; + __u64 flags_padding; /* No flags are currently specified. */ +}; + +/* All region addresses and sizes must be 4K aligned. */ +#define VHOST_PAGE_SIZE 0x1000 + +struct vhost_memory { + __u32 nregions; + __u32 padding; + struct vhost_memory_region regions[0]; +}; + +/* VHOST_SCSI specific definitions */ + +/* + * Used by QEMU userspace to ensure a consistent vhost-scsi ABI. + * + * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate + + * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage + * ABI Rev 1: January 2013. Ignore vhost_tpgt field in struct vhost_scsi_target. + * All the targets under vhost_wwpn can be seen and used by guset. + */ + +#define VHOST_SCSI_ABI_VERSION 1 + +struct vhost_scsi_target { + int abi_version; + char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */ + unsigned short vhost_tpgt; + unsigned short reserved; +}; + +/* Feature bits */ +/* Log all write descriptors. Can be changed while device is active. */ +#define VHOST_F_LOG_ALL 26 +/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ +#define VHOST_NET_F_VIRTIO_NET_HDR 27 + +#endif -- cgit v1.2.3 From e262e32d6bde0f77fb0c95d977482fc872c51996 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:07:23 +0000 Subject: vfs: Suppress MS_* flag defs within the kernel unless explicitly enabled Only the mount namespace code that implements mount(2) should be using the MS_* flags. Suppress them inside the kernel unless uapi/linux/mount.h is included. Signed-off-by: David Howells Signed-off-by: Al Viro Reviewed-by: David Howells --- include/uapi/linux/fs.h | 56 ++++---------------------------------------- include/uapi/linux/mount.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 51 deletions(-) create mode 100644 include/uapi/linux/mount.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index a441ea1bfe6d..53a22e8e0408 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -14,6 +14,11 @@ #include #include +/* Use of MS_* flags within the kernel is restricted to core mount(2) code. */ +#if !defined(__KERNEL__) +#include +#endif + /* * It's silly to have NR_OPEN bigger than NR_FILE, but you can change * the file limit at runtime and only root can increase the per-process @@ -101,57 +106,6 @@ struct inodes_stat_t { #define NR_FILE 8192 /* this can well be larger on a larger system */ - -/* - * These are the fs-independent mount-flags: up to 32 flags are supported - */ -#define MS_RDONLY 1 /* Mount read-only */ -#define MS_NOSUID 2 /* Ignore suid and sgid bits */ -#define MS_NODEV 4 /* Disallow access to device special files */ -#define MS_NOEXEC 8 /* Disallow program execution */ -#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ -#define MS_REMOUNT 32 /* Alter flags of a mounted FS */ -#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ -#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ -#define MS_NOATIME 1024 /* Do not update access times. */ -#define MS_NODIRATIME 2048 /* Do not update directory access times */ -#define MS_BIND 4096 -#define MS_MOVE 8192 -#define MS_REC 16384 -#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. - MS_VERBOSE is deprecated. */ -#define MS_SILENT 32768 -#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ -#define MS_UNBINDABLE (1<<17) /* change to unbindable */ -#define MS_PRIVATE (1<<18) /* change to private */ -#define MS_SLAVE (1<<19) /* change to slave */ -#define MS_SHARED (1<<20) /* change to shared */ -#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ -#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ -#define MS_I_VERSION (1<<23) /* Update inode I_version field */ -#define MS_STRICTATIME (1<<24) /* Always perform atime updates */ -#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ - -/* These sb flags are internal to the kernel */ -#define MS_SUBMOUNT (1<<26) -#define MS_NOREMOTELOCK (1<<27) -#define MS_NOSEC (1<<28) -#define MS_BORN (1<<29) -#define MS_ACTIVE (1<<30) -#define MS_NOUSER (1<<31) - -/* - * Superblock flags that can be altered by MS_REMOUNT - */ -#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ - MS_LAZYTIME) - -/* - * Old magic mount flag and mask - */ -#define MS_MGC_VAL 0xC0ED0000 -#define MS_MGC_MSK 0xffff0000 - /* * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR. */ diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h new file mode 100644 index 000000000000..3f9ec42510b0 --- /dev/null +++ b/include/uapi/linux/mount.h @@ -0,0 +1,58 @@ +#ifndef _UAPI_LINUX_MOUNT_H +#define _UAPI_LINUX_MOUNT_H + +/* + * These are the fs-independent mount-flags: up to 32 flags are supported + * + * Usage of these is restricted within the kernel to core mount(2) code and + * callers of sys_mount() only. Filesystems should be using the SB_* + * equivalent instead. + */ +#define MS_RDONLY 1 /* Mount read-only */ +#define MS_NOSUID 2 /* Ignore suid and sgid bits */ +#define MS_NODEV 4 /* Disallow access to device special files */ +#define MS_NOEXEC 8 /* Disallow program execution */ +#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ +#define MS_REMOUNT 32 /* Alter flags of a mounted FS */ +#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOATIME 1024 /* Do not update access times. */ +#define MS_NODIRATIME 2048 /* Do not update directory access times */ +#define MS_BIND 4096 +#define MS_MOVE 8192 +#define MS_REC 16384 +#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. + MS_VERBOSE is deprecated. */ +#define MS_SILENT 32768 +#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define MS_UNBINDABLE (1<<17) /* change to unbindable */ +#define MS_PRIVATE (1<<18) /* change to private */ +#define MS_SLAVE (1<<19) /* change to slave */ +#define MS_SHARED (1<<20) /* change to shared */ +#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ +#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ +#define MS_I_VERSION (1<<23) /* Update inode I_version field */ +#define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ + +/* These sb flags are internal to the kernel */ +#define MS_SUBMOUNT (1<<26) +#define MS_NOREMOTELOCK (1<<27) +#define MS_NOSEC (1<<28) +#define MS_BORN (1<<29) +#define MS_ACTIVE (1<<30) +#define MS_NOUSER (1<<31) + +/* + * Superblock flags that can be altered by MS_REMOUNT + */ +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ + MS_LAZYTIME) + +/* + * Old magic mount flag and mask + */ +#define MS_MGC_VAL 0xC0ED0000 +#define MS_MGC_MSK 0xffff0000 + +#endif /* _UAPI_LINUX_MOUNT_H */ -- cgit v1.2.3 From b4a1ed0cd18b771e4279b4eb9cf39b565560eea6 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 20 Dec 2018 19:13:07 +0100 Subject: fbdev: make FB_BACKLIGHT a tristate BACKLIGHT_CLASS_DEVICE is already tristate, but a dependency FB_BACKLIGHT prevents it from being built as a module. There doesn't seem to be any particularly good reason for this, so switch FB_BACKLIGHT over to tristate. Signed-off-by: Rob Clark Tested-by: Arnd Bergmann Cc: Simon Horman Cc: Geert Uytterhoeven Cc: Laurent Pinchart Cc: Daniel Vetter Cc: Ulf Magnusson Cc: Randy Dunlap Cc: Hans de Goede Signed-off-by: Bartlomiej Zolnierkiewicz --- include/uapi/linux/fb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fb.h b/include/uapi/linux/fb.h index 6cd9b198b7c6..b6aac7ee1f67 100644 --- a/include/uapi/linux/fb.h +++ b/include/uapi/linux/fb.h @@ -393,11 +393,9 @@ struct fb_cursor { struct fb_image image; /* Cursor image */ }; -#ifdef CONFIG_FB_BACKLIGHT /* Settings for the generic backlight code */ #define FB_BACKLIGHT_LEVELS 128 #define FB_BACKLIGHT_MAX 0xFF -#endif #endif /* _UAPI_LINUX_FB_H */ -- cgit v1.2.3 From 7f92891778dff62303c070ac81de7b7d80de331a Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 20 Dec 2018 12:10:36 +1100 Subject: vfio_pci: Add NVIDIA GV100GL [Tesla V100 SXM2] subdriver POWER9 Witherspoon machines come with 4 or 6 V100 GPUs which are not pluggable PCIe devices but still have PCIe links which are used for config space and MMIO. In addition to that the GPUs have 6 NVLinks which are connected to other GPUs and the POWER9 CPU. POWER9 chips have a special unit on a die called an NPU which is an NVLink2 host bus adapter with p2p connections to 2 to 3 GPUs, 3 or 2 NVLinks to each. These systems also support ATS (address translation services) which is a part of the NVLink2 protocol. Such GPUs also share on-board RAM (16GB or 32GB) to the system via the same NVLink2 so a CPU has cache-coherent access to a GPU RAM. This exports GPU RAM to the userspace as a new VFIO device region. This preregisters the new memory as device memory as it might be used for DMA. This inserts pfns from the fault handler as the GPU memory is not onlined until the vendor driver is loaded and trained the NVLinks so doing this earlier causes low level errors which we fence in the firmware so it does not hurt the host system but still better be avoided; for the same reason this does not map GPU RAM into the host kernel (usual thing for emulated access otherwise). This exports an ATSD (Address Translation Shootdown) register of NPU which allows TLB invalidations inside GPU for an operating system. The register conveniently occupies a single 64k page. It is also presented to the userspace as a new VFIO device region. One NPU has 8 ATSD registers, each of them can be used for TLB invalidation in a GPU linked to this NPU. This allocates one ATSD register per an NVLink bridge allowing passing up to 6 registers. Due to the host firmware bug (just recently fixed), only 1 ATSD register per NPU was actually advertised to the host system so this passes that alone register via the first NVLink bridge device in the group which is still enough as QEMU collects them all back and presents to the guest via vPHB to mimic the emulated NPU PHB on the host. In order to provide the userspace with the information about GPU-to-NVLink connections, this exports an additional capability called "tgt" (which is an abbreviated host system bus address). The "tgt" property tells the GPU its own system address and allows the guest driver to conglomerate the routing information so each GPU knows how to get directly to the other GPUs. For ATS to work, the nest MMU (an NVIDIA block in a P9 CPU) needs to know LPID (a logical partition ID or a KVM guest hardware ID in other words) and PID (a memory context ID of a userspace process, not to be confused with a linux pid). This assigns a GPU to LPID in the NPU and this is why this adds a listener for KVM on an IOMMU group. A PID comes via NVLink from a GPU and NPU uses a PID wildcard to pass it through. This requires coherent memory and ATSD to be available on the host as the GPU vendor only supports configurations with both features enabled and other configurations are known not to work. Because of this and because of the ways the features are advertised to the host system (which is a device tree with very platform specific properties), this requires enabled POWERNV platform. The V100 GPUs do not advertise any of these capabilities via the config space and there are more than just one device ID so this relies on the platform to tell whether these GPUs have special abilities such as NVLinks. Signed-off-by: Alexey Kardashevskiy Acked-by: Alex Williamson Signed-off-by: Michael Ellerman --- include/uapi/linux/vfio.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 813102810f53..02bb7ad6e986 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -353,6 +353,21 @@ struct vfio_region_gfx_edid { #define VFIO_DEVICE_GFX_LINK_STATE_DOWN 2 }; +/* + * 10de vendor sub-type + * + * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. + */ +#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) + +/* + * 1014 vendor sub-type + * + * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU + * to do TLB invalidation on a GPU. + */ +#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) + /* * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped * which allows direct access to non-MSIX registers which happened to be within @@ -363,6 +378,33 @@ struct vfio_region_gfx_edid { */ #define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3 +/* + * Capability with compressed real address (aka SSA - small system address) + * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing + * and by the userspace to associate a NVLink bridge with a GPU. + */ +#define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT 4 + +struct vfio_region_info_cap_nvlink2_ssatgt { + struct vfio_info_cap_header header; + __u64 tgt; +}; + +/* + * Capability with an NVLink link speed. The value is read by + * the NVlink2 bridge driver from the bridge's "ibm,nvlink-speed" + * property in the device tree. The value is fixed in the hardware + * and failing to provide the correct value results in the link + * not working with no indication from the driver why. + */ +#define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD 5 + +struct vfio_region_info_cap_nvlink2_lnkspd { + struct vfio_info_cap_header header; + __u32 link_speed; + __u32 __pad; +}; + /** * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, * struct vfio_irq_info) -- cgit v1.2.3 From 077b930adafead095cd38600539ec129f1379d8c Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Thu, 13 Dec 2018 20:22:00 +0300 Subject: elf-em.h: add EM_CSKY The uapi/linux/audit.h header is going to use EM_CSKY in order to define AUDIT_ARCH_CSKY which is needed to implement syscall_get_arch() which in turn is required to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. The value for EM_CSKY has been taken from arch/csky/include/asm/elf.h and confirmed by binutils:include/elf/common.h Cc: Guo Ren Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Signed-off-by: Dmitry V. Levin Signed-off-by: Guo Ren --- include/uapi/linux/elf-em.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index 93722e60204c..d9544b8a7096 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -43,6 +43,7 @@ #define EM_TILEGX 191 /* Tilera TILE-Gx */ #define EM_RISCV 243 /* RISC-V */ #define EM_BPF 247 /* Linux BPF - in-kernel virtual machine */ +#define EM_CSKY 252 /* C-SKY */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ /* -- cgit v1.2.3 From d770b25653447b4c57303859e2ac04ebe9318f8e Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Thu, 13 Dec 2018 20:22:07 +0300 Subject: csky: define syscall_get_arch() syscall_get_arch() is required to be implemented on all architectures in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Cc: Guo Ren Cc: Paul Moore Cc: Eric Paris Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: linux-audit@redhat.com Signed-off-by: Dmitry V. Levin Signed-off-by: Guo Ren arch/csky/include/asm/syscall.h | 7 +++++++ include/uapi/linux/audit.h | 1 + 2 files changed, 8 insertions(+) --- include/uapi/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 818ae690ab79..f91729232f46 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -378,6 +378,7 @@ enum { #define AUDIT_ARCH_ARM (EM_ARM|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ARMEB (EM_ARM) #define AUDIT_ARCH_CRIS (EM_CRIS|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_CSKY (EM_CSKY|__AUDIT_ARCH_LE) #define AUDIT_ARCH_FRV (EM_FRV) #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) #define AUDIT_ARCH_IA64 (EM_IA_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) -- cgit v1.2.3 From c10b13325ced237f6129e8ee73cd8c72e1bd10ed Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 18 Dec 2018 20:32:37 +0530 Subject: tty: serial: Add RDA8810PL UART driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add UART driver for RDA Micro RDA8810PL SoC. Signed-off-by: Andreas Färber Signed-off-by: Manivannan Sadhasivam Reviewed-by: Greg Kroah-Hartman Acked-by: Arnd Bergmann Signed-off-by: Olof Johansson --- include/uapi/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index dce5f9dae121..df4a7534e239 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -281,4 +281,7 @@ /* MediaTek BTIF */ #define PORT_MTK_BTIF 117 +/* RDA UART */ +#define PORT_RDA 118 + #endif /* _UAPILINUX_SERIAL_CORE_H */ -- cgit v1.2.3 From 7a79d717e0817610932ce3b7b6033ea06ee1d577 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 31 Dec 2018 23:59:59 +0100 Subject: batman-adv: Update copyright years for 2019 Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batadv_packet.h | 2 +- include/uapi/linux/batman_adv.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index 894d8d2f713d..7eb2936a8e22 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */ -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 324a0e1143e7..a28e76a7e0a2 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: MIT */ -/* Copyright (C) 2016-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2016-2019 B.A.T.M.A.N. contributors: * * Matthias Schiffer * -- cgit v1.2.3 From f5162216b7dab0c07e070b8b7f98891a85047f59 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Thu, 3 Jan 2019 15:27:43 -0800 Subject: autofs: add strictexpire mount option Commit 092a53452bb7 ("autofs: take more care to not update last_used on path walk") helped to (partially) resolve a problem where automounts were not expiring due to aggressive accesses from user space. This patch was later reverted because, for very large environments, it meant more mount requests from clients and when there are a lot of clients this caused a fairly significant increase in server load. But there is a need for both types of expire check, depending on use case, so add a mount option to allow for strict update of last use of autofs dentrys (which just means not updating the last use on path walk access). Link: http://lkml.kernel.org/r/154296973880.9889.14085372741514507967.stgit@pluto-themaw-net Signed-off-by: Ian Kent Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/auto_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index df31aa9c9a8c..082119630b49 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h @@ -23,7 +23,7 @@ #define AUTOFS_MIN_PROTO_VERSION 3 #define AUTOFS_MAX_PROTO_VERSION 5 -#define AUTOFS_PROTO_SUBVERSION 3 +#define AUTOFS_PROTO_SUBVERSION 4 /* * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed -- cgit v1.2.3 From 9da22854761a76c45d78aa2ae2b4bbd504b4f171 Mon Sep 17 00:00:00 2001 From: Carmeli Tamir Date: Thu, 3 Jan 2019 15:27:49 -0800 Subject: include/uapi/linux/msdos_fs.h: use MSDOS_NAME for volume label size The FAT file system volume label file stored in the root directory should match the volume label field in the FAT boot sector. As consequence, the max length of these fields ought to be the same. This patch replaces the magic '11' usef in the struct fat_boot_sector with MSDOS_NAME, which is used in struct msdos_dir_entry. Please check the following references: 1. Microsoft FAT specification 2005 (http://read.pudn.com/downloads77/ebook/294884/FAT32%20Spec%20%28SDA%20Contribution%29.pdf). Search for 'volume label'. 2. Microsoft Extensible Firmware Initiative, FAT32 File System Specification (https://staff.washington.edu/dittrich/misc/fatgen103.pdf). Search for 'volume label'. 3. User space code that creates FAT filesystem sometimes uses MSDOS_NAME for the label, sometimes not. Search for 'if (memcmp(label, NO_NAME, MSDOS_NAME))'. I consider to make the same patch there as well. https://github.com/dosfstools/dosfstools/blob/master/src/mkfs.fat.c Link: http://lkml.kernel.org/r/1543096879-82837-1-git-send-email-carmeli.tamir@gmail.com Signed-off-by: Carmeli Tamir Reviewed-by: Sergey Senozhatsky Reviewed-by: Johannes Thumshirn Acked-by: OGAWA Hirofumi Cc: Jens Axboe Cc: Bart Van Assche Cc: Martin K. Petersen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/msdos_fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/msdos_fs.h b/include/uapi/linux/msdos_fs.h index fde753735aba..1216e6caf59b 100644 --- a/include/uapi/linux/msdos_fs.h +++ b/include/uapi/linux/msdos_fs.h @@ -135,7 +135,7 @@ struct fat_boot_sector { for mount state. */ __u8 signature; /* extended boot signature */ __u8 vol_id[4]; /* volume ID */ - __u8 vol_label[11]; /* volume label */ + __u8 vol_label[MSDOS_NAME]; /* volume label */ __u8 fs_type[8]; /* file system type */ /* other fields are not added here */ } fat16; @@ -158,7 +158,7 @@ struct fat_boot_sector { for mount state. */ __u8 signature; /* extended boot signature */ __u8 vol_id[4]; /* volume ID */ - __u8 vol_label[11]; /* volume label */ + __u8 vol_label[MSDOS_NAME]; /* volume label */ __u8 fs_type[8]; /* file system type */ /* other fields are not added here */ } fat32; -- cgit v1.2.3 From b553337a57cf4f077464292520f4e975ea4cda83 Mon Sep 17 00:00:00 2001 From: Carmeli Tamir Date: Thu, 3 Jan 2019 15:27:53 -0800 Subject: fat: remove FAT_FIRST_ENT macro The comment edited in this patch was the only reference to the FAT_FIRST_ENT macro, which is not used anymore. Moreover, the commented line of code does not compile with the current code. Since the FAT_FIRST_ENT macro checks the FAT variant in a way that the patch series changes, I removed it, and instead wrote a clear explanation of what was checked. I verified that the changed comment is correct according to Microsoft FAT spec, search for "BPB_Media" in the following references: 1. Microsoft FAT specification 2005 (http://read.pudn.com/downloads77/ebook/294884/FAT32%20Spec%20%28SDA%20Contribution%29.pdf). Search for 'volume label'. 2. Microsoft Extensible Firmware Initiative, FAT32 File System Specification (https://staff.washington.edu/dittrich/misc/fatgen103.pdf). Search for 'volume label'. Link: http://lkml.kernel.org/r/1544990640-11604-2-git-send-email-carmeli.tamir@gmail.com Signed-off-by: Carmeli Tamir Acked-by: OGAWA Hirofumi Reviewed-by: Sergey Senozhatsky Cc: Bart Van Assche Cc: Johannes Thumshirn Cc: Martin K. Petersen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/msdos_fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/msdos_fs.h b/include/uapi/linux/msdos_fs.h index 1216e6caf59b..833c7079a1e3 100644 --- a/include/uapi/linux/msdos_fs.h +++ b/include/uapi/linux/msdos_fs.h @@ -58,9 +58,6 @@ #define MSDOS_DOT ". " /* ".", padded to MSDOS_NAME chars */ #define MSDOS_DOTDOT ".. " /* "..", padded to MSDOS_NAME chars */ -#define FAT_FIRST_ENT(s, x) ((MSDOS_SB(s)->fat_bits == 32 ? 0x0FFFFF00 : \ - MSDOS_SB(s)->fat_bits == 16 ? 0xFF00 : 0xF00) | (x)) - /* start of data cluster's entry (number of reserved clusters) */ #define FAT_START_ENT 2 -- cgit v1.2.3 From d19dc016187502dda6b8095e44eb46a18e89b2b3 Mon Sep 17 00:00:00 2001 From: Carmeli Tamir Date: Thu, 3 Jan 2019 15:27:56 -0800 Subject: fat: move MAX_FAT to fat.h and change it to inline function MAX_FAT is useless in msdos_fs.h, since it uses the MSDOS_SB function that is defined in fat.h. So really, this macro can be only called from code that already includes fat.h. Hence, this patch moves it to fat.h, right after MSDOS_SB is defined. I also changed it to an inline function in order to save the double call to MSDOS_SB. This was suggested by joe@perches.com in the previous version. This patch is required for the next in the series, in which the variant (whether this is FAT12, FAT16 or FAT32) checks are replaced with new macros. Link: http://lkml.kernel.org/r/1544990640-11604-3-git-send-email-carmeli.tamir@gmail.com Signed-off-by: Carmeli Tamir Acked-by: OGAWA Hirofumi Reviewed-by: Sergey Senozhatsky Cc: Bart Van Assche Cc: Johannes Thumshirn Cc: Martin K. Petersen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/msdos_fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/msdos_fs.h b/include/uapi/linux/msdos_fs.h index 833c7079a1e3..a5773899f4d9 100644 --- a/include/uapi/linux/msdos_fs.h +++ b/include/uapi/linux/msdos_fs.h @@ -65,8 +65,6 @@ #define MAX_FAT12 0xFF4 #define MAX_FAT16 0xFFF4 #define MAX_FAT32 0x0FFFFFF6 -#define MAX_FAT(s) (MSDOS_SB(s)->fat_bits == 32 ? MAX_FAT32 : \ - MSDOS_SB(s)->fat_bits == 16 ? MAX_FAT16 : MAX_FAT12) /* bad cluster mark */ #define BAD_FAT12 0xFF7 -- cgit v1.2.3 From d1877155891020cb26ad4fba45bfee52d8da9951 Mon Sep 17 00:00:00 2001 From: Tigran Aivazian Date: Thu, 3 Jan 2019 15:28:14 -0800 Subject: bfs: extra sanity checking and static inode bitmap Strengthen validation of BFS superblock against corruption. Make in-core inode bitmap static part of superblock info structure. Print a warning when mounting a BFS filesystem created with "-N 512" option as only 510 files can be created in the root directory. Make the kernel messages more uniform. Update the 'prefix' passed to bfs_dump_imap() to match the current naming of operations. White space and comments cleanup. Link: http://lkml.kernel.org/r/CAK+_RLkFZMduoQF36wZFd3zLi-6ZutWKsydjeHFNdtRvZZEb4w@mail.gmail.com Signed-off-by: Tigran Aivazian Reported-by: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/bfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bfs_fs.h b/include/uapi/linux/bfs_fs.h index 940b04772af8..08f6b4956359 100644 --- a/include/uapi/linux/bfs_fs.h +++ b/include/uapi/linux/bfs_fs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * include/linux/bfs_fs.h - BFS data structures on disk. - * Copyright (C) 1999 Tigran Aivazian + * Copyright (C) 1999-2018 Tigran Aivazian */ #ifndef _LINUX_BFS_FS_H -- cgit v1.2.3 From 81c9d43f94870be66146739c6e61df40dc17bb64 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jan 2019 15:28:20 -0800 Subject: kernel/sysctl: add panic_print into sysctl So that we can also runtime chose to print out the needed system info for panic, other than setting the kernel cmdline. Link: http://lkml.kernel.org/r/1543398842-19295-3-git-send-email-feng.tang@intel.com Signed-off-by: Feng Tang Suggested-by: Steven Rostedt Acked-by: Steven Rostedt (VMware) Cc: Thomas Gleixner Cc: John Stultz Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/sysctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index d71013fffaf6..87aa2a6d9125 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -153,6 +153,7 @@ enum KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ KERN_PANIC_ON_WARN=77, /* int: call panic() in WARN() functions */ + KERN_PANIC_PRINT=78, /* ulong: bitmask to print system info on panic */ }; -- cgit v1.2.3 From d4ce5458ea1b7d8ca49c436d602095c4912777d3 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 3 Jan 2019 10:10:37 +0900 Subject: arch: remove stale comments "UAPI Header export list" These comments are leftovers of commit fcc8487d477a ("uapi: export all headers under uapi directories"). Prior to that commit, exported headers must be explicitly added to header-y. Now, all headers under the uapi/ directories are exported. Signed-off-by: Masahiro Yamada --- include/uapi/linux/Kbuild | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index ca2787d9bf0f..5f24b50c9e88 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -1,5 +1,3 @@ -# UAPI Header export list - ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/a.out.h),) no-export-headers += a.out.h endif -- cgit v1.2.3 From 8094c3ceb21ad93896fd4d238e8ba41911932eaf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Jan 2019 08:36:21 -0500 Subject: fscrypt: add Adiantum support Add support for the Adiantum encryption mode to fscrypt. Adiantum is a tweakable, length-preserving encryption mode with security provably reducible to that of XChaCha12 and AES-256, subject to a security bound. It's also a true wide-block mode, unlike XTS. See the paper "Adiantum: length-preserving encryption for entry-level processors" (https://eprint.iacr.org/2018/720.pdf) for more details. Also see commit 059c2a4d8e16 ("crypto: adiantum - add Adiantum support"). On sufficiently long messages, Adiantum's bottlenecks are XChaCha12 and the NH hash function. These algorithms are fast even on processors without dedicated crypto instructions. Adiantum makes it feasible to enable storage encryption on low-end mobile devices that lack AES instructions; currently such devices are unencrypted. On ARM Cortex-A7, on 4096-byte messages Adiantum encryption is about 4 times faster than AES-256-XTS encryption; decryption is about 5 times faster. In fscrypt, Adiantum is suitable for encrypting both file contents and names. With filenames, it fixes a known weakness: when two filenames in a directory share a common prefix of >= 16 bytes, with CTS-CBC their encrypted filenames share a common prefix too, leaking information. Adiantum does not have this problem. Since Adiantum also accepts long tweaks (IVs), it's also safe to use the master key directly for Adiantum encryption rather than deriving per-file keys, provided that the per-file nonce is included in the IVs and the master key isn't used for any other encryption mode. This configuration saves memory and improves performance. A new fscrypt policy flag is added to allow users to opt-in to this configuration. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/uapi/linux/fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index a441ea1bfe6d..086e7ee550df 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -269,7 +269,8 @@ struct fsxattr { #define FS_POLICY_FLAGS_PAD_16 0x02 #define FS_POLICY_FLAGS_PAD_32 0x03 #define FS_POLICY_FLAGS_PAD_MASK 0x03 -#define FS_POLICY_FLAGS_VALID 0x03 +#define FS_POLICY_FLAG_DIRECT_KEY 0x04 /* use master key directly */ +#define FS_POLICY_FLAGS_VALID 0x07 /* Encryption algorithms */ #define FS_ENCRYPTION_MODE_INVALID 0 @@ -281,6 +282,7 @@ struct fsxattr { #define FS_ENCRYPTION_MODE_AES_128_CTS 6 #define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* Removed, do not use. */ #define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* Removed, do not use. */ +#define FS_ENCRYPTION_MODE_ADIANTUM 9 struct fscrypt_policy { __u8 version; -- cgit v1.2.3 From efe75c494f57890900caf6c8a0667db35bfaf56a Mon Sep 17 00:00:00 2001 From: David Abdurachmanov Date: Mon, 29 Oct 2018 11:48:53 +0100 Subject: riscv: add audit support On RISC-V (riscv) audit is supported through generic lib/audit.c. The patch adds required arch specific definitions. Signed-off-by: David Abdurachmanov Signed-off-by: Palmer Dabbelt --- include/uapi/linux/audit.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 818ae690ab79..d0e037a96a7b 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -399,6 +399,8 @@ enum { /* do not define AUDIT_ARCH_PPCLE since it is not supported by audit */ #define AUDIT_ARCH_PPC64 (EM_PPC64|__AUDIT_ARCH_64BIT) #define AUDIT_ARCH_PPC64LE (EM_PPC64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_RISCV32 (EM_RISCV|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_RISCV64 (EM_RISCV|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_S390 (EM_S390) #define AUDIT_ARCH_S390X (EM_S390|__AUDIT_ARCH_64BIT) #define AUDIT_ARCH_SH (EM_SH) -- cgit v1.2.3 From c2eb8effb265ac5cdd960d8e61ecb931e9c767cd Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 24 Oct 2018 06:50:34 -0400 Subject: media: videodev2.h: add v4l2_timeval_to_ns inline function We want to be able to uniquely identify buffers for stateless codecs. The internal timestamp (a u64) as stored internally in the kernel is a suitable candidate for that, but in struct v4l2_buffer it is represented as a struct timeval. Add a v4l2_timeval_to_ns() function that converts the struct timeval into a u64 in the same way that the kernel does. This makes it possible to use this u64 elsewhere as a unique identifier of the buffer. Since timestamps are also copied from the output buffer to the corresponding capture buffer(s) by M2M devices, the u64 can be used to refer to both output and capture buffers. The plan is that in the future we redesign struct v4l2_buffer and use u64 for the timestamp instead of a struct timeval (which has lots of problems with 32 vs 64 bit and y2038 layout changes), and then there is no more need to use this function. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index b5671ce2724f..d6eed479c3a6 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -973,6 +973,18 @@ struct v4l2_buffer { }; }; +/** + * v4l2_timeval_to_ns - Convert timeval to nanoseconds + * @ts: pointer to the timeval variable to be converted + * + * Returns the scalar nanosecond representation of the timeval + * parameter. + */ +static inline __u64 v4l2_timeval_to_ns(const struct timeval *tv) +{ + return (__u64)tv->tv_sec * 1000000000ULL + tv->tv_usec * 1000; +} + /* Flags for 'flags' field */ /* Buffer is mapped (flag) */ #define V4L2_BUF_FLAG_MAPPED 0x00000001 -- cgit v1.2.3 From b7ea4894aa867aaf1c31bfb4b00a3c3e38eedf95 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Mon, 7 Jan 2019 16:22:38 +0100 Subject: ptp: uapi: change _IOW to IOWR in PTP_SYS_OFFSET_EXTENDED definition The ioctl command is read/write (or just read, if the fact that user space writes n_samples field is ignored). Signed-off-by: Eugene Syromiatnikov Signed-off-by: David S. Miller --- include/uapi/linux/ptp_clock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index d73d83950265..1bc794ad957a 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -147,7 +147,7 @@ struct ptp_pin_desc { #define PTP_SYS_OFFSET_PRECISE \ _IOWR(PTP_CLK_MAGIC, 8, struct ptp_sys_offset_precise) #define PTP_SYS_OFFSET_EXTENDED \ - _IOW(PTP_CLK_MAGIC, 9, struct ptp_sys_offset_extended) + _IOWR(PTP_CLK_MAGIC, 9, struct ptp_sys_offset_extended) struct ptp_extts_event { struct ptp_clock_time t; /* Time event occured. */ -- cgit v1.2.3 From c13295ad219d8bb0e47942d4cfc8251de449a67e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 11 Jan 2019 00:25:41 +0100 Subject: binderfs: rename header to binderfs.h It doesn't make sense to call the header binder_ctl.h when its sole existence is tied to binderfs. So give it a sensible name. Users will far more easily remember binderfs.h than binder_ctl.h. Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder_ctl.h | 35 --------------------------------- include/uapi/linux/android/binderfs.h | 35 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 35 deletions(-) delete mode 100644 include/uapi/linux/android/binder_ctl.h create mode 100644 include/uapi/linux/android/binderfs.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/android/binder_ctl.h b/include/uapi/linux/android/binder_ctl.h deleted file mode 100644 index 65b2efd1a0a5..000000000000 --- a/include/uapi/linux/android/binder_ctl.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2018 Canonical Ltd. - * - */ - -#ifndef _UAPI_LINUX_BINDER_CTL_H -#define _UAPI_LINUX_BINDER_CTL_H - -#include -#include -#include - -#define BINDERFS_MAX_NAME 255 - -/** - * struct binderfs_device - retrieve information about a new binder device - * @name: the name to use for the new binderfs binder device - * @major: major number allocated for binderfs binder devices - * @minor: minor number allocated for the new binderfs binder device - * - */ -struct binderfs_device { - char name[BINDERFS_MAX_NAME + 1]; - __u8 major; - __u8 minor; -}; - -/** - * Allocate a new binder device. - */ -#define BINDER_CTL_ADD _IOWR('b', 1, struct binderfs_device) - -#endif /* _UAPI_LINUX_BINDER_CTL_H */ - diff --git a/include/uapi/linux/android/binderfs.h b/include/uapi/linux/android/binderfs.h new file mode 100644 index 000000000000..65b2efd1a0a5 --- /dev/null +++ b/include/uapi/linux/android/binderfs.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2018 Canonical Ltd. + * + */ + +#ifndef _UAPI_LINUX_BINDER_CTL_H +#define _UAPI_LINUX_BINDER_CTL_H + +#include +#include +#include + +#define BINDERFS_MAX_NAME 255 + +/** + * struct binderfs_device - retrieve information about a new binder device + * @name: the name to use for the new binderfs binder device + * @major: major number allocated for binderfs binder devices + * @minor: minor number allocated for the new binderfs binder device + * + */ +struct binderfs_device { + char name[BINDERFS_MAX_NAME + 1]; + __u8 major; + __u8 minor; +}; + +/** + * Allocate a new binder device. + */ +#define BINDER_CTL_ADD _IOWR('b', 1, struct binderfs_device) + +#endif /* _UAPI_LINUX_BINDER_CTL_H */ + -- cgit v1.2.3 From 2e746942ebacf1565caa72cf980745e5ce297c48 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sun, 13 Jan 2019 22:28:05 -0800 Subject: Input: input_event - provide override for sparc64 The usec part of the timeval is defined as __kernel_suseconds_t tv_usec; /* microseconds */ Arnd noticed that sparc64 is the only architecture that defines __kernel_suseconds_t as int rather than long. This breaks the current y2038 fix for kernel as we only access and define the timeval struct for non-kernel use cases. But, this was hidden by an another typo in the use of __KERNEL__ qualifier. Fix the typo, and provide an override for sparc64. Fixes: 152194fe9c3f ("Input: extend usable life of event timestamps to 2106 on 32 bit systems") Reported-by: Arnd Bergmann Signed-off-by: Deepa Dinamani Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index 7288a7c573cc..551866a4f658 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -26,13 +26,17 @@ */ struct input_event { -#if (__BITS_PER_LONG != 32 || !defined(__USE_TIME_BITS64)) && !defined(__KERNEL) +#if (__BITS_PER_LONG != 32 || !defined(__USE_TIME_BITS64)) && !defined(__KERNEL__) struct timeval time; #define input_event_sec time.tv_sec #define input_event_usec time.tv_usec #else __kernel_ulong_t __sec; +#ifdef CONFIG_SPARC64 + unsigned int __usec; +#else __kernel_ulong_t __usec; +#endif #define input_event_sec __sec #define input_event_usec __usec #endif -- cgit v1.2.3 From f275ee0fa3a06eb87edc229749cf1eb18f0663fa Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 10 Jan 2019 21:24:13 +0100 Subject: IN_BADCLASS: fix macro to actually work Commit 65cab850f0ee ("net: Allow class-e address assignment via ifconfig ioctl") modified the IN_BADCLASS macro a bit, but unfortunatly one too many '(' characters were added to the line, making any code that used it, not build properly. Also, the macro now compares an unsigned with a signed value, which isn't ok, so fix that up by making both types match properly. Reported-by: Christopher Ferris Fixes: 65cab850f0ee ("net: Allow class-e address assignment via ifconfig ioctl") Cc: Dave Taht Signed-off-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index f6052e70bf40..a55cb8b10165 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -268,7 +268,7 @@ struct sockaddr_in { #define IN_MULTICAST(a) IN_CLASSD(a) #define IN_MULTICAST_NET 0xe0000000 -#define IN_BADCLASS(a) ((((long int) (a) ) == 0xffffffff) +#define IN_BADCLASS(a) (((long int) (a) ) == (long int)0xffffffff) #define IN_EXPERIMENTAL(a) IN_BADCLASS((a)) #define IN_CLASSE(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000) -- cgit v1.2.3 From 4b837c6d7ee771f68a30f362c9f68171a95be222 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 14 Jan 2019 09:01:54 -0500 Subject: media: v4l: uAPI: V4L2_BUF_TYPE_META_OUTPUT is an output buffer type V4L2_BUF_TYPE_META_OUTPUT was added by commit 72148d1a57e7 ("media: v4l: Add support for V4L2_BUF_TYPE_META_OUTPUT") but the patch missed adding the type to the macro telling whether a given type is an output type or not. Do that now. Getting this wrong leads to handling the buffer as a capture buffer in a lot of places. Fixes: 72148d1a57e7 ("media: v4l: Add support for V4L2_BUF_TYPE_META_OUTPUT") Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index d6eed479c3a6..c0c36c165bf4 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -161,7 +161,8 @@ enum v4l2_buf_type { || (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY \ || (type) == V4L2_BUF_TYPE_VBI_OUTPUT \ || (type) == V4L2_BUF_TYPE_SLICED_VBI_OUTPUT \ - || (type) == V4L2_BUF_TYPE_SDR_OUTPUT) + || (type) == V4L2_BUF_TYPE_SDR_OUTPUT \ + || (type) == V4L2_BUF_TYPE_META_OUTPUT) enum v4l2_tuner_type { V4L2_TUNER_RADIO = 1, -- cgit v1.2.3 From 50656bad786d001b294764e9f047c5d5b3e4db75 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 10 Jan 2019 11:56:09 -0500 Subject: media: v4l2-ctrl: Add control to enable h.264 constrained intra prediction Allow to enable h.264 constrained intra prediction (macroblocks using intra prediction modes are not allowed to use residual data and decoded samples of neighboring macroblocks coded using inter prediction modes). This control directly corresponds to the constrained_intra_pred_flag field in the h.264 picture parameter set. Signed-off-by: Philipp Zabel Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 3dcfc6148f99..fd65c710b144 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -533,6 +533,7 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type { }; #define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER (V4L2_CID_MPEG_BASE+381) #define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER_QP (V4L2_CID_MPEG_BASE+382) +#define V4L2_CID_MPEG_VIDEO_H264_CONSTRAINED_INTRA_PREDICTION (V4L2_CID_MPEG_BASE+383) #define V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP (V4L2_CID_MPEG_BASE+400) #define V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP (V4L2_CID_MPEG_BASE+401) #define V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP (V4L2_CID_MPEG_BASE+402) -- cgit v1.2.3 From d034696cbe5a6e00f76ca4b7869c6cdef66aebd5 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 10 Jan 2019 11:56:10 -0500 Subject: media: v4l2-ctrl: Add control for h.264 chroma qp offset Allow to add fixed quantization parameter offset between luma and chroma quantization parameters. This control directly corresponds to the chroma_qp_index_offset field of the h.264 picture parameter set. Signed-off-by: Philipp Zabel Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index fd65c710b144..06479f2fb3ae 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -534,6 +534,7 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type { #define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER (V4L2_CID_MPEG_BASE+381) #define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER_QP (V4L2_CID_MPEG_BASE+382) #define V4L2_CID_MPEG_VIDEO_H264_CONSTRAINED_INTRA_PREDICTION (V4L2_CID_MPEG_BASE+383) +#define V4L2_CID_MPEG_VIDEO_H264_CHROMA_QP_INDEX_OFFSET (V4L2_CID_MPEG_BASE+384) #define V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP (V4L2_CID_MPEG_BASE+400) #define V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP (V4L2_CID_MPEG_BASE+401) #define V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP (V4L2_CID_MPEG_BASE+402) -- cgit v1.2.3 From 1c3721b1f22286033abeda30b7e12439b083ed0f Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 9 Jan 2019 13:30:04 -0500 Subject: media: videodev2.h: Add more field helper macros Adds two helper macros: V4L2_FIELD_IS_SEQUENTIAL: returns true if the given field type is 'sequential', that is a full frame is transmitted, or exists in memory, as all top field lines followed by all bottom field lines, or vice-versa. V4L2_FIELD_IS_INTERLACED: returns true if the given field type is 'interlaced', that is a full frame is transmitted, or exists in memory, as top field lines interlaced with bottom field lines. Signed-off-by: Steve Longerbeam Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index c0c36c165bf4..9a920f071ff9 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -130,6 +130,13 @@ enum v4l2_field { ((field) == V4L2_FIELD_BOTTOM ||\ (field) == V4L2_FIELD_TOP ||\ (field) == V4L2_FIELD_ALTERNATE) +#define V4L2_FIELD_IS_INTERLACED(field) \ + ((field) == V4L2_FIELD_INTERLACED ||\ + (field) == V4L2_FIELD_INTERLACED_TB ||\ + (field) == V4L2_FIELD_INTERLACED_BT) +#define V4L2_FIELD_IS_SEQUENTIAL(field) \ + ((field) == V4L2_FIELD_SEQ_TB ||\ + (field) == V4L2_FIELD_SEQ_BT) enum v4l2_buf_type { V4L2_BUF_TYPE_VIDEO_CAPTURE = 1, -- cgit v1.2.3 From 75dd48e2e420a3cbbe56dd7adfcc6f142c948272 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Mon, 14 Jan 2019 18:41:35 +0100 Subject: netfilter: nf_tables: Support RULE_ID reference in new rule To allow for a batch to contain rules in arbitrary ordering, introduce NFTA_RULE_POSITION_ID attribute which works just like NFTA_RULE_POSITION but contains the ID of another rule within the same batch. This helps iptables-nft-restore handling dumps with mixed insert/append commands correctly. Note that NFTA_RULE_POSITION takes precedence over NFTA_RULE_POSITION_ID, so if the former is present, the latter is ignored. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 7de4f1bdaf06..99ca95b830b6 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -219,6 +219,7 @@ enum nft_chain_attributes { * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN) * @NFTA_RULE_ID: uniquely identifies a rule in a transaction (NLA_U32) + * @NFTA_RULE_POSITION_ID: transaction unique identifier of the previous rule (NLA_U32) */ enum nft_rule_attributes { NFTA_RULE_UNSPEC, @@ -231,6 +232,7 @@ enum nft_rule_attributes { NFTA_RULE_USERDATA, NFTA_RULE_PAD, NFTA_RULE_ID, + NFTA_RULE_POSITION_ID, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) -- cgit v1.2.3 From 0fb4d21956f4a9af225594a46857ccf29bd747bc Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 16 Jan 2019 07:53:51 +0800 Subject: netfilter: nft_meta: Add NFT_META_I/OIFKIND meta type In the ip_rcv the skb goes through the PREROUTING hook first, then kicks in vrf device and go through the same hook again. When conntrack dnat works with vrf, there will be some conflict with rules because the packet goes through the hook twice with different nf status. ip link add user1 type vrf table 1 ip link add user2 type vrf table 2 ip l set dev tun1 master user1 ip l set dev tun2 master user2 nft add table firewall nft add chain firewall zones { type filter hook prerouting priority - 300 \; } nft add rule firewall zones counter ct zone set iif map { "tun1" : 1, "tun2" : 2 } nft add chain firewall rule-1000-ingress nft add rule firewall rule-1000-ingress ct zone 1 tcp dport 22 ct state new counter accept nft add rule firewall rule-1000-ingress counter drop nft add chain firewall rule-1000-egress nft add rule firewall rule-1000-egress tcp dport 22 ct state new counter drop nft add rule firewall rule-1000-egress counter accept nft add chain firewall rules-all { type filter hook prerouting priority - 150 \; } nft add rule firewall rules-all ip daddr vmap { "2.2.2.11" : jump rule-1000-ingress } nft add rule firewall rules-all ct zone vmap { 1 : jump rule-1000-egress } nft add rule firewall dnat-all ct zone vmap { 1 : jump dnat-1000 } nft add rule firewall dnat-1000 ip daddr 2.2.2.11 counter dnat to 10.0.0.7 For a package with ip daddr 2.2.2.11 and tcp dport 22, first time accept in the rule-1000-ingress and dnat to 10.0.0.7. Then second time the packet goto the wrong chain rule-1000-egress which leads the packet drop With this patch, userspace can add the 'don't re-do entire ruleset for vrf' policy itself via: nft add rule firewall rules-all meta iifkind "vrf" counter accept Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 99ca95b830b6..0ba8f48bdf0b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -791,6 +791,8 @@ enum nft_exthdr_attributes { * @NFT_META_CGROUP: socket control group (skb->sk->sk_classid) * @NFT_META_PRANDOM: a 32bit pseudo-random number * @NFT_META_SECPATH: boolean, secpath_exists (!!skb->sp) + * @NFT_META_IIFKIND: packet input interface kind name (dev->rtnl_link_ops->kind) + * @NFT_META_OIFKIND: packet output interface kind name (dev->rtnl_link_ops->kind) */ enum nft_meta_keys { NFT_META_LEN, @@ -819,6 +821,8 @@ enum nft_meta_keys { NFT_META_CGROUP, NFT_META_PRANDOM, NFT_META_SECPATH, + NFT_META_IIFKIND, + NFT_META_OIFKIND, }; /** -- cgit v1.2.3 From 0123a75e1d57c3df31e536868339c98c02c14917 Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Fri, 18 Jan 2019 14:36:29 +0100 Subject: Revert "netfilter: nft_hash: add map lookups for hashing operations" A better way to implement this from userspace has been found without specific code in the kernel side, revert this. Fixes: b9ccc07e3f31 ("netfilter: nft_hash: add map lookups for hashing operations") Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 0ba8f48bdf0b..030302893d96 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -877,8 +877,8 @@ enum nft_hash_attributes { NFTA_HASH_SEED, NFTA_HASH_OFFSET, NFTA_HASH_TYPE, - NFTA_HASH_SET_NAME, - NFTA_HASH_SET_ID, + NFTA_HASH_SET_NAME, /* deprecated */ + NFTA_HASH_SET_ID, /* deprecated */ __NFTA_HASH_MAX, }; #define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) -- cgit v1.2.3 From f88c19aab5f34835f1ba467c5b508ec4f782f07f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 17 Jan 2019 12:44:25 -0800 Subject: net_sched: add hit counter for matchall Although matchall always matches packets, however, it still relies on a protocol match first. So it is still useful to have such a counter for matchall. Of course, unlike u32, every time we hit a matchall filter, it is always a success, so we don't have to distinguish them. Sample output: filter protocol 802.1Q pref 100 matchall chain 0 filter protocol 802.1Q pref 100 matchall chain 0 handle 0x1 not_in_hw (rule hit 10) action order 1: vlan pop continue index 1 ref 1 bind 1 installed 40 sec used 1 sec Action statistics: Sent 836 bytes 10 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 Reported-by: Martin Olsson Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 95d0db2a8350..32a3416b51c3 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -527,11 +527,17 @@ enum { /* Match-all classifier */ +struct tc_matchall_pcnt { + __u64 rhit; +}; + enum { TCA_MATCHALL_UNSPEC, TCA_MATCHALL_CLASSID, TCA_MATCHALL_ACT, TCA_MATCHALL_FLAGS, + TCA_MATCHALL_PCNT, + TCA_MATCHALL_PAD, __TCA_MATCHALL_MAX, }; -- cgit v1.2.3 From cb5ccfbe73b389470e1dc11061bb185ef4bc9aec Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 17 Jan 2019 23:59:10 +0200 Subject: devlink: Add health buffer support Devlink health buffer is a mechanism to pass descriptors between drivers and devlink. The API allows the driver to add objects, object pair, value array (nested attributes), value and name. Driver can use this API to fill the buffers in a format which can be translated by the devlink to the netlink message. In order to fulfill it, an internal buffer descriptor is defined. This will hold the data and metadata per each attribute and by used to pass actual commands to the netlink. This mechanism will be later used in devlink health for dump and diagnose data store by the drivers. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 6e52d3660654..cff0e0cb5ac2 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -285,6 +285,14 @@ enum devlink_attr { DEVLINK_ATTR_REGION_CHUNK_ADDR, /* u64 */ DEVLINK_ATTR_REGION_CHUNK_LEN, /* u64 */ + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT, /* nested */ + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR, /* nested */ + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME, /* string */ + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE, /* nested */ + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY, /* nested */ + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, /* u8 */ + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, /* dynamic */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From ff253fedab961b22117a73ab808fcfa9e6852b50 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 17 Jan 2019 23:59:13 +0200 Subject: devlink: Add health get command Add devlink health get command to provide reporter/s data for user space. Add the ability to get data per reporter or dump data from all available reporters. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index cff0e0cb5ac2..c05470578b99 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -89,6 +89,8 @@ enum devlink_command { DEVLINK_CMD_REGION_DEL, DEVLINK_CMD_REGION_READ, + DEVLINK_CMD_HEALTH_REPORTER_GET, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -293,6 +295,16 @@ enum devlink_attr { DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, /* u8 */ DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, /* dynamic */ + DEVLINK_ATTR_HEALTH_REPORTER, /* nested */ + DEVLINK_ATTR_HEALTH_REPORTER_NAME, /* string */ + DEVLINK_ATTR_HEALTH_REPORTER_STATE, /* u8 */ + DEVLINK_ATTR_HEALTH_REPORTER_ERR, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL, /* u8 */ + DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, /* u8 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From 6f9d56132eb6d2603d4273cfc65bed914ec47acb Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 17 Jan 2019 23:59:14 +0200 Subject: devlink: Add health set command Add devlink health set command, in order to set configuration parameters for a specific reporter. Supported parameters are: - graceful_period: Time interval between auto recoveries (in msec) - auto_recover: Determines if the devlink shall execute recover upon receiving error for the reporter Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index c05470578b99..49ad5a76b121 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -90,6 +90,7 @@ enum devlink_command { DEVLINK_CMD_REGION_READ, DEVLINK_CMD_HEALTH_REPORTER_GET, + DEVLINK_CMD_HEALTH_REPORTER_SET, /* add new commands above here */ __DEVLINK_CMD_MAX, -- cgit v1.2.3 From fcd852c69d776c0f46c8f79e8e431e5cc6ddc7b7 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 17 Jan 2019 23:59:15 +0200 Subject: devlink: Add health recover command Add devlink health recover command to the uapi, in order to allow the user to execute a recover operation over a specific reporter. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 49ad5a76b121..1c186fd77343 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -91,6 +91,7 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_GET, DEVLINK_CMD_HEALTH_REPORTER_SET, + DEVLINK_CMD_HEALTH_REPORTER_RECOVER, /* add new commands above here */ __DEVLINK_CMD_MAX, -- cgit v1.2.3 From 8a66704a13d9713593342e29b4f0c19762f5746b Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 17 Jan 2019 23:59:16 +0200 Subject: devlink: Add health diagnose command Add devlink health diagnose command, in order to run a diagnose operation over a specific reporter. It is expected from driver's callback for diagnose command to fill it via the buffer descriptors API. Devlink will parse it and convert it to netlink nla API in order to pass it to the user. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 1c186fd77343..51b4d7612cf8 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -92,6 +92,7 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_GET, DEVLINK_CMD_HEALTH_REPORTER_SET, DEVLINK_CMD_HEALTH_REPORTER_RECOVER, + DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, /* add new commands above here */ __DEVLINK_CMD_MAX, -- cgit v1.2.3 From 12bd0dcefe88782ac1c9fff632958dd1b71d27e5 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 17 Jan 2019 23:59:17 +0200 Subject: devlink: Add health dump {get,clear} commands Add devlink health dump commands, in order to run an dump operation over a specific reporter. The supported operations are dump_get in order to get last saved dump (if not exist, dump now) and dump_clear to clear last saved dump. It is expected from driver's callback for diagnose command to fill it via the buffer descriptors API. Devlink will parse it and convert it to netlink nla API in order to pass it to the user. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 51b4d7612cf8..6b26bb2ce4dc 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -93,6 +93,8 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_SET, DEVLINK_CMD_HEALTH_REPORTER_RECOVER, DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, /* add new commands above here */ __DEVLINK_CMD_MAX, -- cgit v1.2.3 From 36647055b37ec78e9068f470f14e7cd75c001c22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 18 Dec 2018 17:02:07 -0800 Subject: cfg80211: Add airtime statistics and settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds TX airtime statistics to the cfg80211 station dump (to go along with the RX info already present), and adds a new parameter to set the airtime weight of each station. The latter allows userspace to implement policies for different stations by varying their weights. Signed-off-by: Toke Høiland-Jørgensen [rmanohar@codeaurora.org: fixed checkpatch warnings] Signed-off-by: Rajkumar Manoharan [move airtime weight != 0 check into policy] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 31ae5c7f10e3..ebe79e12c82e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2299,6 +2299,9 @@ enum nl80211_commands { * This is also used for capability advertisement in the wiphy information, * with the appropriate sub-attributes. * + * @NL80211_ATTR_AIRTIME_WEIGHT: Station's weight when scheduled by the airtime + * scheduler. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2748,6 +2751,8 @@ enum nl80211_attrs { NL80211_ATTR_PEER_MEASUREMENTS, + NL80211_ATTR_AIRTIME_WEIGHT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3125,6 +3130,9 @@ enum nl80211_sta_bss_param { * might not be fully accurate. * @NL80211_STA_INFO_CONNECTED_TO_GATE: set to true if STA has a path to a * mesh gate (u8, 0 or 1) + * @NL80211_STA_INFO_TX_DURATION: aggregate PPDU duration for all frames + * sent to the station (u64, usec) + * @NL80211_STA_INFO_AIRTIME_WEIGHT: current airtime weight for station (u16) * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -3168,6 +3176,8 @@ enum nl80211_sta_info { NL80211_STA_INFO_RX_MPDUS, NL80211_STA_INFO_FCS_ERROR_COUNT, NL80211_STA_INFO_CONNECTED_TO_GATE, + NL80211_STA_INFO_TX_DURATION, + NL80211_STA_INFO_AIRTIME_WEIGHT, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, @@ -5316,6 +5326,10 @@ enum nl80211_feature_flags { * if this flag is not set. Ignoring this can leak clear text packets and/or * freeze the connection. * + * @NL80211_EXT_FEATURE_AIRTIME_FAIRNESS: Driver supports getting airtime + * fairness for transmitted packets and has enabled airtime fairness + * scheduling. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5355,6 +5369,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT, NL80211_EXT_FEATURE_CAN_REPLACE_PTK0, NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER, + NL80211_EXT_FEATURE_AIRTIME_FAIRNESS, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From cc24163690997c685641d84e77ff6f1c592b06fe Mon Sep 17 00:00:00 2001 From: Julan Hsu Date: Tue, 15 Jan 2019 15:28:42 -0800 Subject: nl80211/mac80211: mesh: add hop count to mpath info Expose hop count to destination information in mpath info Signed-off-by: Julan Hsu Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ebe79e12c82e..213a1d7c1063 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3287,6 +3287,7 @@ enum nl80211_mpath_flags { * &enum nl80211_mpath_flags; * @NL80211_MPATH_INFO_DISCOVERY_TIMEOUT: total path discovery timeout, in msec * @NL80211_MPATH_INFO_DISCOVERY_RETRIES: mesh path discovery retries + * @NL80211_MPATH_INFO_HOP_COUNT: hop count to destination * @NL80211_MPATH_INFO_MAX: highest mesh path information attribute number * currently defind * @__NL80211_MPATH_INFO_AFTER_LAST: internal use @@ -3300,6 +3301,7 @@ enum nl80211_mpath_info { NL80211_MPATH_INFO_FLAGS, NL80211_MPATH_INFO_DISCOVERY_TIMEOUT, NL80211_MPATH_INFO_DISCOVERY_RETRIES, + NL80211_MPATH_INFO_HOP_COUNT, /* keep last */ __NL80211_MPATH_INFO_AFTER_LAST, -- cgit v1.2.3 From 540bbcb930ed2fc9d6a57e0babea00027a7ecc67 Mon Sep 17 00:00:00 2001 From: Julan Hsu Date: Tue, 15 Jan 2019 15:28:43 -0800 Subject: nl80211/mac80211: mesh: add mesh path change count to mpath info Expose path change count to destination in mpath info Signed-off-by: Julan Hsu Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 213a1d7c1063..426db4d8f71c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3288,8 +3288,9 @@ enum nl80211_mpath_flags { * @NL80211_MPATH_INFO_DISCOVERY_TIMEOUT: total path discovery timeout, in msec * @NL80211_MPATH_INFO_DISCOVERY_RETRIES: mesh path discovery retries * @NL80211_MPATH_INFO_HOP_COUNT: hop count to destination + * @NL80211_MPATH_INFO_PATH_CHANGE: total number of path changes to destination * @NL80211_MPATH_INFO_MAX: highest mesh path information attribute number - * currently defind + * currently defined * @__NL80211_MPATH_INFO_AFTER_LAST: internal use */ enum nl80211_mpath_info { @@ -3302,6 +3303,7 @@ enum nl80211_mpath_info { NL80211_MPATH_INFO_DISCOVERY_TIMEOUT, NL80211_MPATH_INFO_DISCOVERY_RETRIES, NL80211_MPATH_INFO_HOP_COUNT, + NL80211_MPATH_INFO_PATH_CHANGE, /* keep last */ __NL80211_MPATH_INFO_AFTER_LAST, -- cgit v1.2.3 From 5954894ba3723995fbeab77bef62bb4878a654bb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 17 Jan 2019 17:14:01 -0800 Subject: net_sched: add performance counters for basic filter Similar to u32 filter, it is useful to know how many times we reach each basic filter and how many times we pass the ematch attached to it. Sample output: filter protocol arp pref 49152 basic chain 0 filter protocol arp pref 49152 basic chain 0 handle 0x1 (rule hit 3 success 3) action order 1: gact action pass random type none pass val 0 index 1 ref 1 bind 1 installed 81 sec used 4 sec Action statistics: Sent 126 bytes 3 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 32a3416b51c3..02ac251be8c4 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -333,12 +333,19 @@ enum { /* Basic filter */ +struct tc_basic_pcnt { + __u64 rcnt; + __u64 rhit; +}; + enum { TCA_BASIC_UNSPEC, TCA_BASIC_CLASSID, TCA_BASIC_EMATCHES, TCA_BASIC_ACT, TCA_BASIC_POLICE, + TCA_BASIC_PCNT, + TCA_BASIC_PAD, __TCA_BASIC_MAX }; -- cgit v1.2.3 From ad07c8ceb6631a83b62d405a61448bba92adac68 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Thu, 10 Jan 2019 13:53:34 +0000 Subject: perf/core: Remove unused perf_flags Now that perf_flags is not used we remove it. Signed-off-by: Andrew Murray Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Mark Rutland Cc: Matt Turner Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Richard Henderson Cc: Russell King Cc: Sascha Hauer Cc: Shawn Guo Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: robin.murphy@arm.com Cc: suzuki.poulose@arm.com Link: https://lkml.kernel.org/r/1547128414-50693-13-git-send-email-andrew.murray@arm.com Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 9de8780ac8d9..ea19b5d491bf 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -445,8 +445,6 @@ struct perf_event_query_bpf { __u32 ids[0]; }; -#define perf_flags(attr) (*(&(attr)->read_format + 1)) - /* * Ioctls that can be done on a perf event fd: */ -- cgit v1.2.3 From 76193a94522f1d4edf2447a536f3f796ce56343b Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 17 Jan 2019 08:15:13 -0800 Subject: perf, bpf: Introduce PERF_RECORD_KSYMBOL For better performance analysis of dynamically JITed and loaded kernel functions, such as BPF programs, this patch introduces PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol register/unregister information to user space. The following data structure is used for PERF_RECORD_KSYMBOL. /* * struct { * struct perf_event_header header; * u64 addr; * u32 len; * u16 ksym_type; * u16 flags; * char name[]; * struct sample_id sample_id; * }; */ Signed-off-by: Song Liu Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Acked-by: Peter Zijlstra Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Peter Zijlstra Cc: kernel-team@fb.com Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/20190117161521.1341602-2-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index ea19b5d491bf..1dee5c8f166b 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -372,7 +372,8 @@ struct perf_event_attr { context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ - __reserved_1 : 35; + ksymbol : 1, /* include ksymbol events */ + __reserved_1 : 34; union { __u32 wakeup_events; /* wakeup every n events */ @@ -963,9 +964,32 @@ enum perf_event_type { */ PERF_RECORD_NAMESPACES = 16, + /* + * Record ksymbol register/unregister events: + * + * struct { + * struct perf_event_header header; + * u64 addr; + * u32 len; + * u16 ksym_type; + * u16 flags; + * char name[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_KSYMBOL = 17, + PERF_RECORD_MAX, /* non-ABI */ }; +enum perf_record_ksymbol_type { + PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0, + PERF_RECORD_KSYMBOL_TYPE_BPF = 1, + PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */ +}; + +#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) + #define PERF_MAX_STACK_DEPTH 127 #define PERF_MAX_CONTEXTS_PER_STACK 8 -- cgit v1.2.3 From 6ee52e2a3fe4ea35520720736e6791df1fb67106 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 17 Jan 2019 08:15:15 -0800 Subject: perf, bpf: Introduce PERF_RECORD_BPF_EVENT For better performance analysis of BPF programs, this patch introduces PERF_RECORD_BPF_EVENT, a new perf_event_type that exposes BPF program load/unload information to user space. Each BPF program may contain up to BPF_MAX_SUBPROGS (256) sub programs. The following example shows kernel symbols for a BPF program with 7 sub programs: ffffffffa0257cf9 t bpf_prog_b07ccb89267cf242_F ffffffffa02592e1 t bpf_prog_2dcecc18072623fc_F ffffffffa025b0e9 t bpf_prog_bb7a405ebaec5d5c_F ffffffffa025dd2c t bpf_prog_a7540d4a39ec1fc7_F ffffffffa025fcca t bpf_prog_05762d4ade0e3737_F ffffffffa026108f t bpf_prog_db4bd11e35df90d4_F ffffffffa0263f00 t bpf_prog_89d64e4abf0f0126_F ffffffffa0257cf9 t bpf_prog_ae31629322c4b018__dummy_tracepoi When a bpf program is loaded, PERF_RECORD_KSYMBOL is generated for each of these sub programs. Therefore, PERF_RECORD_BPF_EVENT is not needed for simple profiling. For annotation, user space need to listen to PERF_RECORD_BPF_EVENT and gather more information about these (sub) programs via sys_bpf. Signed-off-by: Song Liu Reviewed-by: Arnaldo Carvalho de Melo Acked-by: Alexei Starovoitov Acked-by: Peter Zijlstra (Intel) Tested-by: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Peter Zijlstra Cc: kernel-team@fb.com Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/20190117161521.1341602-4-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1dee5c8f166b..7198ddd0c6b1 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -373,7 +373,8 @@ struct perf_event_attr { write_backward : 1, /* Write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ ksymbol : 1, /* include ksymbol events */ - __reserved_1 : 34; + bpf_event : 1, /* include bpf events */ + __reserved_1 : 33; union { __u32 wakeup_events; /* wakeup every n events */ @@ -979,6 +980,25 @@ enum perf_event_type { */ PERF_RECORD_KSYMBOL = 17, + /* + * Record bpf events: + * enum perf_bpf_event_type { + * PERF_BPF_EVENT_UNKNOWN = 0, + * PERF_BPF_EVENT_PROG_LOAD = 1, + * PERF_BPF_EVENT_PROG_UNLOAD = 2, + * }; + * + * struct { + * struct perf_event_header header; + * u16 type; + * u16 flags; + * u32 id; + * u8 tag[BPF_TAG_SIZE]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_BPF_EVENT = 18, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -990,6 +1010,13 @@ enum perf_record_ksymbol_type { #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) +enum perf_bpf_event_type { + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ +}; + #define PERF_MAX_STACK_DEPTH 127 #define PERF_MAX_CONTEXTS_PER_STACK 8 -- cgit v1.2.3 From aefcb7460e0b5f35f72601b7a98eec5ca1639cf2 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: m68k/mac: Fix PRAM accessors PMU-based m68k Macs pre-date PowerMac-style NVRAM. Use the appropriate PMU commands. Also implement the missing XPRAM accessors for VIA-based Macs. Acked-by: Geert Uytterhoeven Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/pmu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pmu.h b/include/uapi/linux/pmu.h index 97256f90e6df..f2fc1bd80017 100644 --- a/include/uapi/linux/pmu.h +++ b/include/uapi/linux/pmu.h @@ -19,7 +19,9 @@ #define PMU_POWER_CTRL 0x11 /* control power of some devices */ #define PMU_ADB_CMD 0x20 /* send ADB packet */ #define PMU_ADB_POLL_OFF 0x21 /* disable ADB auto-poll */ +#define PMU_WRITE_XPRAM 0x32 /* write eXtended Parameter RAM */ #define PMU_WRITE_NVRAM 0x33 /* write non-volatile RAM */ +#define PMU_READ_XPRAM 0x3a /* read eXtended Parameter RAM */ #define PMU_READ_NVRAM 0x3b /* read non-volatile RAM */ #define PMU_SET_RTC 0x30 /* set real-time clock */ #define PMU_READ_RTC 0x38 /* read real-time clock */ -- cgit v1.2.3 From 6fc23b6ed8fa0ba6cc47b2f8756df1199abc3a5c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 12:01:19 +0100 Subject: binderfs: use correct include guards in header When we switched over from binder_ctl.h to binderfs.h we forgot to change the include guards. It's minor but it's obviously correct. Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binderfs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/android/binderfs.h b/include/uapi/linux/android/binderfs.h index 65b2efd1a0a5..b41628b77120 100644 --- a/include/uapi/linux/android/binderfs.h +++ b/include/uapi/linux/android/binderfs.h @@ -4,8 +4,8 @@ * */ -#ifndef _UAPI_LINUX_BINDER_CTL_H -#define _UAPI_LINUX_BINDER_CTL_H +#ifndef _UAPI_LINUX_BINDERFS_H +#define _UAPI_LINUX_BINDERFS_H #include #include @@ -31,5 +31,5 @@ struct binderfs_device { */ #define BINDER_CTL_ADD _IOWR('b', 1, struct binderfs_device) -#endif /* _UAPI_LINUX_BINDER_CTL_H */ +#endif /* _UAPI_LINUX_BINDERFS_H */ -- cgit v1.2.3 From 7d0174065f4903fb0ce0bab3d5047284faa7226d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 12:01:20 +0100 Subject: binderfs: use __u32 for device numbers We allow more then 255 binderfs binder devices to be created since there are workloads that require more than that. If we use __u8 we'll overflow after 255. So let's use a __u32. Note that there's no released kernel with binderfs out there so this is not a regression. Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binderfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/android/binderfs.h b/include/uapi/linux/android/binderfs.h index b41628b77120..87410477aea9 100644 --- a/include/uapi/linux/android/binderfs.h +++ b/include/uapi/linux/android/binderfs.h @@ -22,8 +22,8 @@ */ struct binderfs_device { char name[BINDERFS_MAX_NAME + 1]; - __u8 major; - __u8 minor; + __u32 major; + __u32 minor; }; /** -- cgit v1.2.3 From ec74136ded792deed80780a2f8baf3521eeb72f9 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 14 Jan 2019 09:10:21 -0800 Subject: binder: create node flag to request sender's security context To allow servers to verify client identity, allow a node flag to be set that causes the sender's security context to be delivered with the transaction. The BR_TRANSACTION command is extended in BR_TRANSACTION_SEC_CTX to contain a pointer to the security context string. Signed-off-by: Todd Kjos Reviewed-by: Joel Fernandes (Google) Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index b9ba520f7e4b..2832134e5397 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -41,6 +41,14 @@ enum { enum { FLAT_BINDER_FLAG_PRIORITY_MASK = 0xff, FLAT_BINDER_FLAG_ACCEPTS_FDS = 0x100, + + /** + * @FLAT_BINDER_FLAG_TXN_SECURITY_CTX: request security contexts + * + * Only when set, causes senders to include their security + * context + */ + FLAT_BINDER_FLAG_TXN_SECURITY_CTX = 0x1000, }; #ifdef BINDER_IPC_32BIT @@ -218,6 +226,7 @@ struct binder_node_info_for_ref { #define BINDER_VERSION _IOWR('b', 9, struct binder_version) #define BINDER_GET_NODE_DEBUG_INFO _IOWR('b', 11, struct binder_node_debug_info) #define BINDER_GET_NODE_INFO_FOR_REF _IOWR('b', 12, struct binder_node_info_for_ref) +#define BINDER_SET_CONTEXT_MGR_EXT _IOW('b', 13, struct flat_binder_object) /* * NOTE: Two special error codes you should check for when calling @@ -276,6 +285,11 @@ struct binder_transaction_data { } data; }; +struct binder_transaction_data_secctx { + struct binder_transaction_data transaction_data; + binder_uintptr_t secctx; +}; + struct binder_transaction_data_sg { struct binder_transaction_data transaction_data; binder_size_t buffers_size; @@ -311,6 +325,11 @@ enum binder_driver_return_protocol { BR_OK = _IO('r', 1), /* No parameters! */ + BR_TRANSACTION_SEC_CTX = _IOR('r', 2, + struct binder_transaction_data_secctx), + /* + * binder_transaction_data_secctx: the received command. + */ BR_TRANSACTION = _IOR('r', 2, struct binder_transaction_data), BR_REPLY = _IOR('r', 3, struct binder_transaction_data), /* -- cgit v1.2.3 From a258aeacd7f0dc10bb45caa7e92a3ea3ca1a76e9 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 18 Jan 2019 14:30:23 +0200 Subject: bonding: add support for xstats and export 3ad stats This patch adds support for extended statistics (xstats) call to the bonding. The first user would be the 3ad code which counts the following events: - LACPDU Rx/Tx - LACPDU unknown type Rx - LACPDU illegal Rx - Marker Rx/Tx - Marker response Rx/Tx - Marker unknown type Rx All of these are exported via netlink as separate attributes to be easily extensible as we plan to add more in the future. Similar to how the bridge and other xstats exports, the structure inside is: [ IFLA_STATS_LINK_XSTATS ] -> [ LINK_XSTATS_TYPE_BOND ] -> [ BOND_XSTATS_3AD ] -> [ 3ad stats attributes ] With this structure it's easy to add more stat types later. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bonding.h | 24 ++++++++++++++++++++++++ include/uapi/linux/if_link.h | 1 + 2 files changed, 25 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h index 61a1bf6e865e..790585f0e61b 100644 --- a/include/uapi/linux/if_bonding.h +++ b/include/uapi/linux/if_bonding.h @@ -117,6 +117,30 @@ struct ad_info { __u8 partner_system[ETH_ALEN]; }; +/* Embedded inside LINK_XSTATS_TYPE_BOND */ +enum { + BOND_XSTATS_UNSPEC, + BOND_XSTATS_3AD, + __BOND_XSTATS_MAX +}; +#define BOND_XSTATS_MAX (__BOND_XSTATS_MAX - 1) + +/* Embedded inside BOND_XSTATS_3AD */ +enum { + BOND_3AD_STAT_LACPDU_RX, + BOND_3AD_STAT_LACPDU_TX, + BOND_3AD_STAT_LACPDU_UNKNOWN_RX, + BOND_3AD_STAT_LACPDU_ILLEGAL_RX, + BOND_3AD_STAT_MARKER_RX, + BOND_3AD_STAT_MARKER_TX, + BOND_3AD_STAT_MARKER_RESP_RX, + BOND_3AD_STAT_MARKER_RESP_TX, + BOND_3AD_STAT_MARKER_UNKNOWN_RX, + BOND_3AD_STAT_PAD, + __BOND_3AD_STAT_MAX +}; +#define BOND_3AD_STAT_MAX (__BOND_3AD_STAT_MAX - 1) + #endif /* _LINUX_IF_BONDING_H */ /* diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d6533828123a..5b225ff63b48 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -925,6 +925,7 @@ enum { enum { LINK_XSTATS_TYPE_UNSPEC, LINK_XSTATS_TYPE_BRIDGE, + LINK_XSTATS_TYPE_BOND, __LINK_XSTATS_TYPE_MAX }; #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) -- cgit v1.2.3 From 4effd28c1245303dce7fd290c501ac2c11052114 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 21 Jan 2019 07:26:27 +0100 Subject: bridge: join all-snoopers multicast address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next to snooping IGMP/MLD queries RFC4541, section 2.1.1.a) recommends to snoop multicast router advertisements to detect multicast routers. Multicast router advertisements are sent to an "all-snoopers" multicast address. To be able to receive them reliably, we need to join this group. Otherwise other snooping switches might refrain from forwarding these advertisements to us. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index a55cb8b10165..e7ad9d350a28 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -292,10 +292,11 @@ struct sockaddr_in { #define IN_LOOPBACK(a) ((((long int) (a)) & 0xff000000) == 0x7f000000) /* Defines for Multicast INADDR */ -#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */ -#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */ -#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */ -#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */ +#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */ +#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */ +#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */ +#define INADDR_ALLSNOOPERS_GROUP 0xe000006aU /* 224.0.0.106 */ +#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */ #endif /* contains the htonl type stuff.. */ -- cgit v1.2.3 From 4b3087c7e37f9e499127201849e33960dc81da11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 21 Jan 2019 07:26:28 +0100 Subject: bridge: Snoop Multicast Router Advertisements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When multiple multicast routers are present in a broadcast domain then only one of them will be detectable via IGMP/MLD query snooping. The multicast router with the lowest IP address will become the selected and active querier while all other multicast routers will then refrain from sending queries. To detect such rather silent multicast routers, too, RFC4286 ("Multicast Router Discovery") provides a standardized protocol to detect multicast routers for multicast snooping switches. This patch implements the necessary MRD Advertisement message parsing and after successful processing adds such routers to the internal multicast router list. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- include/uapi/linux/icmpv6.h | 2 ++ include/uapi/linux/igmp.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index caf8dc019250..325395f56bfa 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -108,6 +108,8 @@ struct icmp6hdr { #define ICMPV6_MOBILE_PREFIX_SOL 146 #define ICMPV6_MOBILE_PREFIX_ADV 147 +#define ICMPV6_MRDISC_ADV 151 + /* * Codes for Destination Unreachable */ diff --git a/include/uapi/linux/igmp.h b/include/uapi/linux/igmp.h index 7e44ac02ca18..90c28bc466c6 100644 --- a/include/uapi/linux/igmp.h +++ b/include/uapi/linux/igmp.h @@ -93,6 +93,7 @@ struct igmpv3_query { #define IGMP_MTRACE_RESP 0x1e #define IGMP_MTRACE 0x1f +#define IGMP_MRDISC_ADV 0x30 /* From RFC4286 */ /* * Use the BSD names for these for compatibility -- cgit v1.2.3 From 141e5dcaa7356077028b4cd48ec351a38c70e5e5 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Thu, 24 Jan 2019 00:29:20 -0800 Subject: Input: input_event - fix the CONFIG_SPARC64 mixup Arnd Bergmann pointed out that CONFIG_* cannot be used in a uapi header. Override with an equivalent conditional. Fixes: 2e746942ebac ("Input: input_event - provide override for sparc64") Fixes: 152194fe9c3f ("Input: extend usable life of event timestamps to 2106 on 32 bit systems") Signed-off-by: Deepa Dinamani Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index ffab958bc512..f056b2a00d5c 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -32,7 +32,7 @@ struct input_event { #define input_event_usec time.tv_usec #else __kernel_ulong_t __sec; -#ifdef CONFIG_SPARC64 +#if defined(__sparc__) && defined(__arch64__) unsigned int __usec; #else __kernel_ulong_t __usec; -- cgit v1.2.3 From d9ff286a0f59fa7843549e49bd240393dd7d8b87 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Jan 2019 09:22:27 -0800 Subject: bpf: allow BPF programs access skb_shared_info->gso_segs field This adds the ability to read gso_segs from a BPF program. v3: Use BPF_REG_AX instead of BPF_REG_TMP for the temporary register, as suggested by Martin. v2: refined Eddie Hao patch to address Alexei feedback. Signed-off-by: Eric Dumazet Cc: Eddie Hao Cc: Martin KaFai Lau Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 91c43884f295..2940a9854f6d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2540,6 +2540,7 @@ struct __sk_buff { __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); __u64 tstamp; __u32 wire_len; + __u32 gso_segs; }; struct bpf_tunnel_key { -- cgit v1.2.3 From 45383fb0f42db3945ac6cc658704706cdae19528 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 23 Jan 2019 17:50:26 +0800 Subject: virtio: support VIRTIO_F_ORDER_PLATFORM This patch introduces the support for VIRTIO_F_ORDER_PLATFORM. If this feature is negotiated, the driver must use the barriers suitable for hardware devices. Otherwise, the device and driver are assumed to be implemented in software, that is they can be assumed to run on identical CPUs in an SMP configuration. Thus a weaker form of memory barriers is sufficient to yield better performance. It is recommended that an add-in card based PCI device offers this feature for portability. The device will fail to operate further or will operate in a slower emulation mode if this feature is offered but not accepted. Signed-off-by: Tiwei Bie Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_config.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index 1196e1c1d4f6..ff8e7dc9d4dd 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -78,6 +78,12 @@ /* This feature indicates support for the packed virtqueue layout. */ #define VIRTIO_F_RING_PACKED 34 +/* + * This feature indicates that memory accesses by the driver and the + * device are ordered in a way described by the platform. + */ +#define VIRTIO_F_ORDER_PLATFORM 36 + /* * Does the device support Single Root I/O Virtualization? */ -- cgit v1.2.3 From 745815f955f65f22d378d69822da11043d00aaff Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 24 Jan 2019 18:20:13 +0900 Subject: uapi: fix ioctl documentation The description of the BLKGETNRZONES zoned block device ioctl was not added as a comment together with this ioctl definition in commit 65e4e3eee83d7 ("block: Introduce BLKGETNRZONES ioctl"). Add its description here. Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- include/uapi/linux/blkzoned.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index 6fa38d001d84..498eec813494 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -138,6 +138,7 @@ struct blk_zone_range { * @BLKRESETZONE: Reset the write pointer of the zones in the specified * sector range. The sector range must be zone aligned. * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors. + * @BLKGETNRZONES: Get the total number of zones of the device. */ #define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report) #define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range) -- cgit v1.2.3 From a36b38aa2af61146ea80980a01cf6e952ab021c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 24 Jan 2019 19:59:39 +0100 Subject: xsk: add sock_diag interface for AF_XDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds the sock_diag interface for querying sockets from user space. Tools like iproute2 ss(8) can use this interface to list open AF_XDP sockets. The user-space ABI is defined in linux/xdp_diag.h and includes netlink request and response structs. The request can query sockets and the response contains socket information about the rings, umems, inode and more. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- include/uapi/linux/xdp_diag.h | 72 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 include/uapi/linux/xdp_diag.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xdp_diag.h b/include/uapi/linux/xdp_diag.h new file mode 100644 index 000000000000..78b2591a7782 --- /dev/null +++ b/include/uapi/linux/xdp_diag.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * xdp_diag: interface for query/monitor XDP sockets + * Copyright(c) 2019 Intel Corporation. + */ + +#ifndef _LINUX_XDP_DIAG_H +#define _LINUX_XDP_DIAG_H + +#include + +struct xdp_diag_req { + __u8 sdiag_family; + __u8 sdiag_protocol; + __u16 pad; + __u32 xdiag_ino; + __u32 xdiag_show; + __u32 xdiag_cookie[2]; +}; + +struct xdp_diag_msg { + __u8 xdiag_family; + __u8 xdiag_type; + __u16 pad; + __u32 xdiag_ino; + __u32 xdiag_cookie[2]; +}; + +#define XDP_SHOW_INFO (1 << 0) /* Basic information */ +#define XDP_SHOW_RING_CFG (1 << 1) +#define XDP_SHOW_UMEM (1 << 2) +#define XDP_SHOW_MEMINFO (1 << 3) + +enum { + XDP_DIAG_NONE, + XDP_DIAG_INFO, + XDP_DIAG_UID, + XDP_DIAG_RX_RING, + XDP_DIAG_TX_RING, + XDP_DIAG_UMEM, + XDP_DIAG_UMEM_FILL_RING, + XDP_DIAG_UMEM_COMPLETION_RING, + XDP_DIAG_MEMINFO, + __XDP_DIAG_MAX, +}; + +#define XDP_DIAG_MAX (__XDP_DIAG_MAX - 1) + +struct xdp_diag_info { + __u32 ifindex; + __u32 queue_id; +}; + +struct xdp_diag_ring { + __u32 entries; /*num descs */ +}; + +#define XDP_DU_F_ZEROCOPY (1 << 0) + +struct xdp_diag_umem { + __u64 size; + __u32 id; + __u32 num_pages; + __u32 chunk_size; + __u32 headroom; + __u32 ifindex; + __u32 queue_id; + __u32 flags; + __u32 refs; +}; + +#endif /* _LINUX_XDP_DIAG_H */ -- cgit v1.2.3 From ab4dfa20534e32e48de6b761b42d943518fb26f7 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Wed, 19 Dec 2018 22:52:25 +0530 Subject: cfg80211: Allow drivers to advertise supported AKM suites There was no such capability advertisement from the driver and thus the current user space has to assume the driver to support all the AKMs. While that may be the case with some drivers (e.g., mac80211-based ones), there are cfg80211-based drivers that implement SME and have constraints on which AKMs can be supported (e.g., such drivers may need an update to support SAE AKM using NL80211_CMD_EXTERNAL_AUTH). Allow such drivers to advertise the exact set of supported AKMs so that user space tools can determine what network profile options should be allowed to be configured. Signed-off-by: Veerendranath Jakkam [pmsr data might be big, start a new netlink message section] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 426db4d8f71c..5f9d5cd458a1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1565,6 +1565,12 @@ enum nl80211_commands { * (a u32 with flags from &enum nl80211_wpa_versions). * @NL80211_ATTR_AKM_SUITES: Used with CONNECT, ASSOCIATE, and NEW_BEACON to * indicate which key management algorithm(s) to use (an array of u32). + * This attribute is also sent in response to @NL80211_CMD_GET_WIPHY, + * indicating the supported AKM suites, intended for specific drivers which + * implement SME and have constraints on which AKMs are supported and also + * the cases where an AKM support is offloaded to the driver/firmware. + * If there is no such notification from the driver, user space should + * assume the driver supports all the AKM suites. * * @NL80211_ATTR_REQ_IE: (Re)association request information elements as * sent out by the card, for ROAM and successful CONNECT events. -- cgit v1.2.3 From 30e5c2c6bf285d93dee4c45f23da95d7d50b125a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 25 Jan 2019 10:53:23 -0800 Subject: net: Revert devlink health changes. This reverts the devlink health changes from 9/17/2019, Jiri wants things to be designed differently and it was agreed that the easiest way to do this is start from the beginning again. Commits reverted: cb5ccfbe73b389470e1dc11061bb185ef4bc9aec 880ee82f0313453ec5a6cb122866ac057263066b c7af343b4e33578b7de91786a3f639c8cfa0d97b ff253fedab961b22117a73ab808fcfa9e6852b50 6f9d56132eb6d2603d4273cfc65bed914ec47acb fcd852c69d776c0f46c8f79e8e431e5cc6ddc7b7 8a66704a13d9713593342e29b4f0c19762f5746b 12bd0dcefe88782ac1c9fff632958dd1b71d27e5 aba25279c10094c5c97d09c3491ca86d00b4ad5e ce019faa70f81555fa17ebc1d5a03651f2e7e15a b8c45a033acc607201588f7665ba84207e5149e0 And the follow-on build fix: o33a0efa4baecd689da9474ce0e8b673eb6931c60 Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 6b26bb2ce4dc..6e52d3660654 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -89,13 +89,6 @@ enum devlink_command { DEVLINK_CMD_REGION_DEL, DEVLINK_CMD_REGION_READ, - DEVLINK_CMD_HEALTH_REPORTER_GET, - DEVLINK_CMD_HEALTH_REPORTER_SET, - DEVLINK_CMD_HEALTH_REPORTER_RECOVER, - DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, - DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, - DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, - /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -292,24 +285,6 @@ enum devlink_attr { DEVLINK_ATTR_REGION_CHUNK_ADDR, /* u64 */ DEVLINK_ATTR_REGION_CHUNK_LEN, /* u64 */ - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT, /* nested */ - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR, /* nested */ - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME, /* string */ - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE, /* nested */ - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY, /* nested */ - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, /* u8 */ - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, /* dynamic */ - - DEVLINK_ATTR_HEALTH_REPORTER, /* nested */ - DEVLINK_ATTR_HEALTH_REPORTER_NAME, /* string */ - DEVLINK_ATTR_HEALTH_REPORTER_STATE, /* u8 */ - DEVLINK_ATTR_HEALTH_REPORTER_ERR, /* u64 */ - DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, /* u64 */ - DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL, /* u8 */ - DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, /* u64 */ - DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, /* u64 */ - DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, /* u8 */ - /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From fe4943702c850fa07f963eaa6f1530d9d4c2da78 Mon Sep 17 00:00:00 2001 From: Srinivas Dasari Date: Wed, 23 Jan 2019 18:06:56 +0530 Subject: cfg80211: Authentication offload to user space in AP mode commit 40cbfa90218b ("cfg80211/nl80211: Optional authentication offload to userspace")' introduced authentication offload to user space by the host drivers in station mode. This commit extends the same for the AP mode too. Extend NL80211_ATTR_EXTERNAL_AUTH_SUPPORT to also claim the support of external authentication from the user space in AP mode. A new flag parameter is introduced in cfg80211_ap_settings to intend the same while "start ap". Host driver to use NL80211_CMD_FRAME interface to transmit and receive the authentication frames to / from the user space. Host driver to indicate the flag NL80211_RXMGMT_FLAG_EXTERNAL_AUTH while sending the authentication frame to the user space. This intends to the user space that the driver wishes it to process the authentication frame for certain protocols, though it had initially advertised the support for SME functionality. User space shall accordingly do the authentication and indicate its final status through the command NL80211_CMD_EXTERNAL_AUTH. Allow the command even if userspace doesn't include the attribute NL80211_ATTR_SSID for AP interface. Host driver shall continue with the association sequence and indicate the STA connection status through cfg80211_new_sta. To facilitate the host drivers in AP mode for matching the pmkid by the stations during the association, NL80211_CMD_EXTERNAL_AUTH is also enhanced to include the pmkid to drivers after the authentication. This pmkid can also be used in the STA mode to include in the association request. Also modify nl80211_external_auth to not mandate SSID in AP mode. Signed-off-by: Srinivas Dasari [remove useless nla_get_flag() usage] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5f9d5cd458a1..8b0fdb9e133b 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2266,10 +2266,10 @@ enum nl80211_commands { * &enum nl80211_external_auth_action value). This is used with the * %NL80211_CMD_EXTERNAL_AUTH request event. * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user - * space supports external authentication. This attribute shall be used - * only with %NL80211_CMD_CONNECT request. The driver may offload - * authentication processing to user space if this capability is indicated - * in NL80211_CMD_CONNECT requests from the user space. + * space supports external authentication. This attribute shall be used + * with %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP request. The driver + * may offload authentication processing to user space if this capability + * is indicated in the respective requests from the user space. * * @NL80211_ATTR_NSS: Station's New/updated RX_NSS value notified using this * u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED. @@ -5631,9 +5631,14 @@ enum nl80211_crit_proto_id { * Used by cfg80211_rx_mgmt() * * @NL80211_RXMGMT_FLAG_ANSWERED: frame was answered by device/driver. + * @NL80211_RXMGMT_FLAG_EXTERNAL_AUTH: Host driver intends to offload + * the authentication. Exclusively defined for host drivers that + * advertises the SME functionality but would like the userspace + * to handle certain authentication algorithms (e.g. SAE). */ enum nl80211_rxmgmt_flags { NL80211_RXMGMT_FLAG_ANSWERED = 1 << 0, + NL80211_RXMGMT_FLAG_EXTERNAL_AUTH = 1 << 1, }; /* -- cgit v1.2.3 From 6c900360e7c0df6a4846ac97d7b548d72cd801b0 Mon Sep 17 00:00:00 2001 From: Liangwei Dong Date: Fri, 18 Jan 2019 16:54:38 +0530 Subject: nl80211: Allow set/del pmksa operations for AP Host drivers may offload authentication to the user space through the commit ("cfg80211: Authentication offload to user space in AP mode"). This interface can be used to implement SAE by having the userspace do authentication/PMKID key derivation and driver handle the association. A step ahead, this interface can get further optimized if the PMKID is passed to the host driver and also have it respond to the association request by the STA on a valid PMKID. This commit enables the userspace to pass the PMKID to the host drivers through the set/del pmksa operations in AP mode. Set/Del pmksa is now restricted to STA/P2P client mode only and thus the drivers might not expect them in any other(AP) mode. This commit also introduces a feature flag NL80211_EXT_FEATURE_AP_PMKSA_CACHING (johannes: renamed) to maintain the backward compatibility of such an expectation by the host drivers. These operations are allowed in AP mode only when the drivers advertize the capability through this flag. Signed-off-by: Liangwei Dong Signed-off-by: Srinivas Dasari [rename flag to NL80211_EXT_FEATURE_AP_PMKSA_CACHING] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 8b0fdb9e133b..dd4f86ee286e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5340,6 +5340,9 @@ enum nl80211_feature_flags { * fairness for transmitted packets and has enabled airtime fairness * scheduling. * + * @NL80211_EXT_FEATURE_AP_PMKSA_CACHING: Driver/device supports PMKSA caching + * (set/del PMKSA operations) in AP mode. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5380,6 +5383,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_CAN_REPLACE_PTK0, NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS, + NL80211_EXT_FEATURE_AP_PMKSA_CACHING, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From d405c7407a5468d4fc11724d76063e0647d80106 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Sat, 26 Jan 2019 12:25:59 -0500 Subject: bpf: allocate 0x06 to new eBPF instruction class JMP32 The new eBPF instruction class JMP32 uses the reserved class number 0x6. Kernel BPF ISA documentation updated accordingly. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2940a9854f6d..60b99b730a41 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -14,6 +14,7 @@ /* Extended instruction set based on top of classic BPF */ /* instruction classes */ +#define BPF_JMP32 0x06 /* jmp mode in word width */ #define BPF_ALU64 0x07 /* alu mode in double word width */ /* ld/ldx fields */ -- cgit v1.2.3 From 71368af9027f18fe5d1c6f372cfdff7e4bde8b48 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 16 Jan 2019 17:01:36 -0500 Subject: x86/speculation: Add PR_SPEC_DISABLE_NOEXEC With the default SPEC_STORE_BYPASS_SECCOMP/SPEC_STORE_BYPASS_PRCTL mode, the TIF_SSBD bit will be inherited when a new task is fork'ed or cloned. It will also remain when a new program is execve'ed. Only certain class of applications (like Java) that can run on behalf of multiple users on a single thread will require disabling speculative store bypass for security purposes. Those applications will call prctl(2) at startup time to disable SSB. They won't rely on the fact the SSB might have been disabled. Other applications that don't need SSBD will just move on without checking if SSBD has been turned on or not. The fact that the TIF_SSBD is inherited across execve(2) boundary will cause performance of applications that don't need SSBD but their predecessors have SSBD on to be unwittingly impacted especially if they write to memory a lot. To remedy this problem, a new PR_SPEC_DISABLE_NOEXEC argument for the PR_SET_SPECULATION_CTRL option of prctl(2) is added to allow applications to specify that the SSBD feature bit on the task structure should be cleared whenever a new program is being execve'ed. Suggested-by: Thomas Gleixner Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: "H. Peter Anvin" Cc: Andi Kleen Cc: David Woodhouse Cc: Jiri Kosina Cc: Josh Poimboeuf Cc: Tim Chen Cc: KarimAllah Ahmed Cc: Peter Zijlstra Cc: Konrad Rzeszutek Wilk Link: https://lkml.kernel.org/r/1547676096-3281-1-git-send-email-longman@redhat.com --- include/uapi/linux/prctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index b4875a93363a..094bb03b9cc2 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -219,6 +219,7 @@ struct prctl_mm_map { # define PR_SPEC_ENABLE (1UL << 1) # define PR_SPEC_DISABLE (1UL << 2) # define PR_SPEC_FORCE_DISABLE (1UL << 3) +# define PR_SPEC_DISABLE_NOEXEC (1UL << 4) /* Reset arm64 pointer authentication keys */ #define PR_PAC_RESET_KEYS 54 -- cgit v1.2.3 From 1194c4133195dfcb6c5fc0935d54bbed872a5285 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 29 Jan 2019 00:56:17 +0000 Subject: nfit: Add Hyper-V NVDIMM DSM command set to white list Add the Hyper-V _DSM command set to the white list of NVDIMM command sets. This command set is documented at http://www.uefi.org/RFIC_LIST (see "Virtual NVDIMM 0x1901"). Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index f57c9e434d2d..de5d90212409 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -243,6 +243,7 @@ struct nd_cmd_pkg { #define NVDIMM_FAMILY_HPE1 1 #define NVDIMM_FAMILY_HPE2 2 #define NVDIMM_FAMILY_MSFT 3 +#define NVDIMM_FAMILY_HYPERV 4 #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ struct nd_cmd_pkg) -- cgit v1.2.3 From f4601dee25d5fe8010023552b10879f3d62e45ce Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 28 Jan 2019 18:00:21 +0530 Subject: devlink: Add port param get command Add port param get command which gets data per parameter. It also has option to dump the parameters data per port. v7->v8: Append "Acked-by: Jiri Pirko " Cc: Jiri Pirko Signed-off-by: Vasundhara Volam Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 6e52d3660654..448973beac9d 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -89,6 +89,8 @@ enum devlink_command { DEVLINK_CMD_REGION_DEL, DEVLINK_CMD_REGION_READ, + DEVLINK_CMD_PORT_PARAM_GET, /* can dump */ + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 -- cgit v1.2.3 From 9c54873b4e2ee22507627b1adac9e3a8407741bd Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 28 Jan 2019 18:00:22 +0530 Subject: devlink: Add port param set command Add port param set command to set the value for a parameter. Value can be set to any of the supported configuration modes. v7->v8: Append "Acked-by: Jiri Pirko " Cc: Jiri Pirko Signed-off-by: Vasundhara Volam Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 448973beac9d..3658fb20b190 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -90,6 +90,7 @@ enum devlink_command { DEVLINK_CMD_REGION_READ, DEVLINK_CMD_PORT_PARAM_GET, /* can dump */ + DEVLINK_CMD_PORT_PARAM_SET, /* add new commands above here */ __DEVLINK_CMD_MAX, -- cgit v1.2.3 From c1e5786d6771c67fe044c3bcaa23e631e0503261 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 28 Jan 2019 18:00:25 +0530 Subject: devlink: Add devlink notifications support for port params Add notification call for devlink port param set, register and unregister functions. Add devlink_port_param_value_changed() function to enable the driver notify devlink on value change. Driver should use this function after value was changed on any configuration mode part to driverinit. v7->v8: Order devlink_port_param_value_changed() definitions followed by devlink_param_value_changed() Cc: Jiri Pirko Signed-off-by: Vasundhara Volam Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 3658fb20b190..61b4447a6c5b 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -91,6 +91,8 @@ enum devlink_command { DEVLINK_CMD_PORT_PARAM_GET, /* can dump */ DEVLINK_CMD_PORT_PARAM_SET, + DEVLINK_CMD_PORT_PARAM_NEW, + DEVLINK_CMD_PORT_PARAM_DEL, /* add new commands above here */ __DEVLINK_CMD_MAX, -- cgit v1.2.3 From 2d908b38d40921a03225d42fd6e48eb51bffd606 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 23 Jan 2019 11:28:19 +0100 Subject: serial: Add Tegra Combined UART driver The Tegra Combined UART (TCU) is a mailbox-based mechanism that allows multiplexing multiple "virtual UARTs" into a single hardware serial port. The TCU is the primary serial port on Tegra194 devices. Add a TCU driver utilizing the mailbox framework, as the used mailboxes are part of Tegra HSP blocks that are already controlled by the Tegra HSP mailbox driver. Based on work by Mikko Perttunen . Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index df4a7534e239..6009ee2c2e99 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -79,6 +79,9 @@ /* Nuvoton UART */ #define PORT_NPCM 40 +/* NVIDIA Tegra Combined UART */ +#define PORT_TEGRA_TCU 41 + /* Intel EG20 */ #define PORT_PCH_8LINE 44 #define PORT_PCH_2LINE 45 -- cgit v1.2.3 From 80df2704a375bb4b3c9c5cce9c00052361b16d61 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 28 Jan 2019 15:08:23 +0800 Subject: sctp: introduce SCTP_FUTURE/CURRENT/ALL_ASSOC This patch is to add 3 constants SCTP_FUTURE_ASSOC, SCTP_CURRENT_ASSOC and SCTP_ALL_ASSOC for reserved assoc_ids, as defined in rfc6458#section-7.2. And add the process for them when doing lookup and inserting in sctp_id2assoc and sctp_assoc_set_id. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index d584073532b8..b8f2c4d56532 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -59,6 +59,10 @@ typedef __s32 sctp_assoc_t; +#define SCTP_FUTURE_ASSOC 0 +#define SCTP_CURRENT_ASSOC 1 +#define SCTP_ALL_ASSOC 2 + /* The following symbols come from the Sockets API Extensions for * SCTP . */ -- cgit v1.2.3 From d0a060be573bfbf8753a15dca35497db5e968bb0 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 30 Jan 2019 12:02:44 +0000 Subject: arm64: add ptrace regsets for ptrauth key management Add two new ptrace regsets, which can be used to request and change the pointer authentication keys of a thread. NT_ARM_PACA_KEYS gives access to the instruction/data address keys, and NT_ARM_PACG_KEYS to the generic authentication key. The keys are also part of the core dump file of the process. The regsets are only exposed if the kernel is compiled with CONFIG_CHECKPOINT_RESTORE=y, as the only intended use case is checkpointing and restoring processes that are using pointer authentication. (This can be changed later if there are other use cases.) Reviewed-by: Dave Martin Signed-off-by: Kristina Martsenko Signed-off-by: Catalin Marinas --- include/uapi/linux/elf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index e4d6ddd93567..34c02e4290fe 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -421,6 +421,8 @@ typedef struct elf64_shdr { #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ #define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */ #define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */ +#define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */ +#define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3 From d83525ca62cf8ebe3271d14c36fb900c294274a2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 31 Jan 2019 15:40:04 -0800 Subject: bpf: introduce bpf_spin_lock Introduce 'struct bpf_spin_lock' and bpf_spin_lock/unlock() helpers to let bpf program serialize access to other variables. Example: struct hash_elem { int cnt; struct bpf_spin_lock lock; }; struct hash_elem * val = bpf_map_lookup_elem(&hash_map, &key); if (val) { bpf_spin_lock(&val->lock); val->cnt++; bpf_spin_unlock(&val->lock); } Restrictions and safety checks: - bpf_spin_lock is only allowed inside HASH and ARRAY maps. - BTF description of the map is mandatory for safety analysis. - bpf program can take one bpf_spin_lock at a time, since two or more can cause dead locks. - only one 'struct bpf_spin_lock' is allowed per map element. It drastically simplifies implementation yet allows bpf program to use any number of bpf_spin_locks. - when bpf_spin_lock is taken the calls (either bpf2bpf or helpers) are not allowed. - bpf program must bpf_spin_unlock() before return. - bpf program can access 'struct bpf_spin_lock' only via bpf_spin_lock()/bpf_spin_unlock() helpers. - load/store into 'struct bpf_spin_lock lock;' field is not allowed. - to use bpf_spin_lock() helper the BTF description of map value must be a struct and have 'struct bpf_spin_lock anyname;' field at the top level. Nested lock inside another struct is not allowed. - syscall map_lookup doesn't copy bpf_spin_lock field to user space. - syscall map_update and program map_update do not update bpf_spin_lock field. - bpf_spin_lock cannot be on the stack or inside networking packet. bpf_spin_lock can only be inside HASH or ARRAY map value. - bpf_spin_lock is available to root only and to all program types. - bpf_spin_lock is not allowed in inner maps of map-in-map. - ld_abs is not allowed inside spin_lock-ed region. - tracing progs and socket filter progs cannot use bpf_spin_lock due to insufficient preemption checks Implementation details: - cgroup-bpf class of programs can nest with xdp/tc programs. Hence bpf_spin_lock is equivalent to spin_lock_irqsave. Other solutions to avoid nested bpf_spin_lock are possible. Like making sure that all networking progs run with softirq disabled. spin_lock_irqsave is the simplest and doesn't add overhead to the programs that don't use it. - arch_spinlock_t is used when its implemented as queued_spin_lock - archs can force their own arch_spinlock_t - on architectures where queued_spin_lock is not available and sizeof(arch_spinlock_t) != sizeof(__u32) trivial lock is used. - presence of bpf_spin_lock inside map value could have been indicated via extra flag during map_create, but specifying it via BTF is cleaner. It provides introspection for map key/value and reduces user mistakes. Next steps: - allow bpf_spin_lock in other map types (like cgroup local storage) - introduce BPF_F_LOCK flag for bpf_map_update() syscall and helper to request kernel to grab bpf_spin_lock before rewriting the value. That will serialize access to map elements. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 60b99b730a41..86f7c438d40f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2422,7 +2422,9 @@ union bpf_attr { FN(map_peek_elem), \ FN(msg_push_data), \ FN(msg_pop_data), \ - FN(rc_pointer_rel), + FN(rc_pointer_rel), \ + FN(spin_lock), \ + FN(spin_unlock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3056,4 +3058,7 @@ struct bpf_line_info { __u32 line_col; }; +struct bpf_spin_lock { + __u32 val; +}; #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 96049f3afd50fe8db69fa0068cdca822e747b1e4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 31 Jan 2019 15:40:09 -0800 Subject: bpf: introduce BPF_F_LOCK flag Introduce BPF_F_LOCK flag for map_lookup and map_update syscall commands and for map_update() helper function. In all these cases take a lock of existing element (which was provided in BTF description) before copying (in or out) the rest of map value. Implementation details that are part of uapi: Array: The array map takes the element lock for lookup/update. Hash: hash map also takes the lock for lookup/update and tries to avoid the bucket lock. If old element exists it takes the element lock and updates the element in place. If element doesn't exist it allocates new one and inserts into hash table while holding the bucket lock. In rare case the hashmap has to take both the bucket lock and the element lock to update old value in place. Cgroup local storage: It is similar to array. update in place and lookup are done with lock taken. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 86f7c438d40f..1777fa0c61e4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -267,6 +267,7 @@ enum bpf_attach_type { #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */ +#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ /* flags for BPF_MAP_CREATE command */ #define BPF_F_NO_PREALLOC (1U << 0) -- cgit v1.2.3 From fb99bce7120014307dde57b3d7def6977a9a62a1 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 30 Jan 2019 21:58:05 +0000 Subject: net: tls: Support 256 bit keys Wire up support for 256 bit keys from the setsockopt to the crypto framework Signed-off-by: Dave Watson Signed-off-by: David S. Miller --- include/uapi/linux/tls.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index ff02287495ac..9affceaa3db4 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -59,6 +59,13 @@ #define TLS_CIPHER_AES_GCM_128_TAG_SIZE 16 #define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE 8 +#define TLS_CIPHER_AES_GCM_256 52 +#define TLS_CIPHER_AES_GCM_256_IV_SIZE 8 +#define TLS_CIPHER_AES_GCM_256_KEY_SIZE 32 +#define TLS_CIPHER_AES_GCM_256_SALT_SIZE 4 +#define TLS_CIPHER_AES_GCM_256_TAG_SIZE 16 +#define TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE 8 + #define TLS_SET_RECORD_TYPE 1 #define TLS_GET_RECORD_TYPE 2 @@ -75,4 +82,12 @@ struct tls12_crypto_info_aes_gcm_128 { unsigned char rec_seq[TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE]; }; +struct tls12_crypto_info_aes_gcm_256 { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_AES_GCM_256_IV_SIZE]; + unsigned char key[TLS_CIPHER_AES_GCM_256_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_AES_GCM_256_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE]; +}; + #endif /* _UAPI_LINUX_TLS_H */ -- cgit v1.2.3 From 130b392c6cd6b2aed1b7eb32253d4920babb4891 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 30 Jan 2019 21:58:31 +0000 Subject: net: tls: Add tls 1.3 support TLS 1.3 has minor changes from TLS 1.2 at the record layer. * Header now hardcodes the same version and application content type in the header. * The real content type is appended after the data, before encryption (or after decryption). * The IV is xored with the sequence number, instead of concatinating four bytes of IV with the explicit IV. * Zero-padding: No exlicit length is given, we search backwards from the end of the decrypted data for the first non-zero byte, which is the content type. Currently recv supports reading zero-padding, but there is no way for send to add zero padding. Signed-off-by: Dave Watson Signed-off-by: David S. Miller --- include/uapi/linux/tls.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index 9affceaa3db4..401d6f01de6a 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -51,6 +51,10 @@ #define TLS_1_2_VERSION_MINOR 0x3 #define TLS_1_2_VERSION TLS_VERSION_NUMBER(TLS_1_2) +#define TLS_1_3_VERSION_MAJOR 0x3 +#define TLS_1_3_VERSION_MINOR 0x4 +#define TLS_1_3_VERSION TLS_VERSION_NUMBER(TLS_1_3) + /* Supported ciphers */ #define TLS_CIPHER_AES_GCM_128 51 #define TLS_CIPHER_AES_GCM_128_IV_SIZE 8 -- cgit v1.2.3 From f9cf22882c606f3ffe06f620bb6d03b9eff18d3d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 31 Jan 2019 10:50:40 -0800 Subject: devlink: add device information API ethtool -i has served us well for a long time, but its showing its limitations more and more. The device information should also be reported per device not per-netdev. Lay foundation for a simple devlink-based way of reading device info. Add driver name and device serial number as initial pieces of information exposed via this new API. v3: - rename helpers (Jiri); - rename driver name attr (Jiri); - remove double spacing in commit message (Jiri). RFC v2: - wrap the skb into an opaque structure (Jiri); - allow the serial number of be any length (Jiri & Andrew); - add driver name (Jonathan). Signed-off-by: Jakub Kicinski Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 61b4447a6c5b..142710d45093 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -94,6 +94,8 @@ enum devlink_command { DEVLINK_CMD_PORT_PARAM_NEW, DEVLINK_CMD_PORT_PARAM_DEL, + DEVLINK_CMD_INFO_GET, /* can dump */ + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -290,6 +292,9 @@ enum devlink_attr { DEVLINK_ATTR_REGION_CHUNK_ADDR, /* u64 */ DEVLINK_ATTR_REGION_CHUNK_LEN, /* u64 */ + DEVLINK_ATTR_INFO_DRIVER_NAME, /* string */ + DEVLINK_ATTR_INFO_SERIAL_NUMBER, /* string */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From fc6fae7dd987dccce3f322c32dc26b52d69ad00e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 31 Jan 2019 10:50:41 -0800 Subject: devlink: add version reporting to devlink info API ethtool -i has a few fixed-size fields which can be used to report firmware version and expansion ROM version. Unfortunately, modern hardware has more firmware components. There is usually some datapath microcode, management controller, PXE drivers, and a CPLD load. Running ethtool -i on modern controllers reveals the fact that vendors cram multiple values into firmware version field. Here are some examples from systems I could lay my hands on quickly: tg3: "FFV20.2.17 bc 5720-v1.39" i40e: "6.01 0x800034a4 1.1747.0" nfp: "0.0.3.5 0.25 sriov-2.1.16 nic" Add a new devlink API to allow retrieving multiple versions, and provide user-readable name for those versions. While at it break down the versions into three categories: - fixed - this is the board/fixed component version, usually vendors report information like the board version in the PCI VPD, but it will benefit from naming and common API as well; - running - this is the running firmware version; - stored - this is firmware in the flash, after firmware update this value will reflect the flashed version, while the running version may only be updated after reboot. v3: - add per-type helpers instead of using the special argument (Jiri). RFCv2: - remove the nesting in attr DEVLINK_ATTR_INFO_VERSIONS (now versions are mixed with other info attrs)l - have the driver report versions from the same callback as other info. Signed-off-by: Jakub Kicinski Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 142710d45093..7fffd879c328 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -294,6 +294,11 @@ enum devlink_attr { DEVLINK_ATTR_INFO_DRIVER_NAME, /* string */ DEVLINK_ATTR_INFO_SERIAL_NUMBER, /* string */ + DEVLINK_ATTR_INFO_VERSION_FIXED, /* nested */ + DEVLINK_ATTR_INFO_VERSION_RUNNING, /* nested */ + DEVLINK_ATTR_INFO_VERSION_STORED, /* nested */ + DEVLINK_ATTR_INFO_VERSION_NAME, /* string */ + DEVLINK_ATTR_INFO_VERSION_VALUE, /* string */ /* add new attributes above here, update the policy in devlink.c */ -- cgit v1.2.3 From bcb3fc3247e5a7ceb467ca0cfdaa4c1b830dd8f9 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sat, 2 Feb 2019 07:34:47 -0800 Subject: arch: sparc: Override struct __kernel_old_timeval struct __kernel_old_timeval is supposed to have the same layout as struct timeval. But, it was inadvarently missed that __kernel_suseconds has a different definition for sparc64. Provide an asm-specific override that fixes it. Reported-by: Arnd Bergmann Suggested-by: Arnd Bergmann Signed-off-by: Deepa Dinamani Acked-by: Willem de Bruijn Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller --- include/uapi/linux/time.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index 6b56a2208be7..04d5587f30d3 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -63,10 +63,12 @@ struct __kernel_itimerspec { * here, this is probably because it is not y2038 safe and needs to * be changed to use another interface. */ +#ifndef __kernel_old_timeval struct __kernel_old_timeval { __kernel_long_t tv_sec; __kernel_long_t tv_usec; }; +#endif /* * The IDs of the various system clocks (for POSIX.1b interval timers): -- cgit v1.2.3 From 98bb03c865d7ddf7a9a2eb80f9a0dd5f261c56ad Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sat, 2 Feb 2019 07:34:49 -0800 Subject: socket: Add struct __kernel_sock_timeval The new type is meant to be used as a y2038 safe structure to be used as part of cmsg data. Presently the SO_TIMESTAMP socket option uses struct timeval for timestamps. This is not y2038 safe. Subsequent patches in the series add new y2038 safe socket option to be used in the place of SO_TIMESTAMP_OLD. struct __kernel_sock_timeval will be used as the timestamp format at that time. struct __kernel_sock_timeval also maintains the same layout across 32 bit and 64 bit ABIs. Signed-off-by: Deepa Dinamani Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/time.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index 04d5587f30d3..b8ad1b86b942 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -70,6 +70,11 @@ struct __kernel_old_timeval { }; #endif +struct __kernel_sock_timeval { + __s64 tv_sec; + __s64 tv_usec; +}; + /* * The IDs of the various system clocks (for POSIX.1b interval timers): */ -- cgit v1.2.3 From 9718475e69084de15c3930ce35672a7dc6da866b Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sat, 2 Feb 2019 07:34:51 -0800 Subject: socket: Add SO_TIMESTAMPING_NEW Add SO_TIMESTAMPING_NEW variant of socket timestamp options. This is the y2038 safe versions of the SO_TIMESTAMPING_OLD for all architectures. Signed-off-by: Deepa Dinamani Acked-by: Willem de Bruijn Cc: chris@zankel.net Cc: fenghua.yu@intel.com Cc: rth@twiddle.net Cc: tglx@linutronix.de Cc: ubraun@linux.ibm.com Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: linux-s390@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller --- include/uapi/linux/errqueue.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index c0151200f7d1..d955b9e32288 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -41,6 +41,10 @@ struct scm_timestamping { struct timespec ts[3]; }; +struct scm_timestamping64 { + struct __kernel_timespec ts[3]; +}; + /* The type of scm_timestamping, passed in sock_extended_err ee_info. * This defines the type of ts[0]. For SCM_TSTAMP_SND only, if ts[0] * is zero, then this is a hardware timestamp and recorded in ts[2]. -- cgit v1.2.3 From bff5731d43efbdf0bbd2d73cab32fe6435ea1046 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 1 Feb 2019 17:56:28 -0800 Subject: net: devlink: report cell size of shared buffers Shared buffer allocation is usually done in cell increments. Drivers will either round up the allocation or refuse the configuration if it's not an exact multiple of cell size. Drivers know exactly the cell size of shared buffer, so help out users by providing this information in dumps. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 7fffd879c328..054b2d1a4537 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -300,6 +300,8 @@ enum devlink_attr { DEVLINK_ATTR_INFO_VERSION_NAME, /* string */ DEVLINK_ATTR_INFO_VERSION_VALUE, /* string */ + DEVLINK_ATTR_SB_POOL_CELL_SIZE, /* u32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From a46c52d9f2659498f0c0871f7f2333a692c243fe Mon Sep 17 00:00:00 2001 From: wenxu Date: Tue, 29 Jan 2019 15:51:17 +0800 Subject: netfilter: nft_tunnel: Add NFTA_TUNNEL_MODE options nft "tunnel" expr match both the tun_info of RX and TX. This patch provide the NFTA_TUNNEL_MODE to individually match the tun_info of RX or TX. Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 030302893d96..a66c8de006cc 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1727,10 +1727,19 @@ enum nft_tunnel_keys { }; #define NFT_TUNNEL_MAX (__NFT_TUNNEL_MAX - 1) +enum nft_tunnel_mode { + NFT_TUNNEL_MODE_NONE, + NFT_TUNNEL_MODE_RX, + NFT_TUNNEL_MODE_TX, + __NFT_TUNNEL_MODE_MAX +}; +#define NFT_TUNNEL_MODE_MAX (__NFT_TUNNEL_MODE_MAX - 1) + enum nft_tunnel_attributes { NFTA_TUNNEL_UNSPEC, NFTA_TUNNEL_KEY, NFTA_TUNNEL_DREG, + NFTA_TUNNEL_MODE, __NFTA_TUNNEL_MAX }; #define NFTA_TUNNEL_MAX (__NFTA_TUNNEL_MAX - 1) -- cgit v1.2.3 From 3eb450367d0823226515ee24712ed08eccb33eb9 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Tue, 23 Oct 2018 23:21:14 -0400 Subject: rds: add type of service(tos) infrastructure RDS Service type (TOS) is user-defined and needs to be configured via RDS IOCTL interface. It must be set before initiating any traffic and once set the TOS can not be changed. All out-going traffic from the socket will be associated with its TOS. Reviewed-by: Sowmini Varadhan Signed-off-by: Santosh Shilimkar [yanjun.zhu@oracle.com: Adapted original patch with ipv6 changes] Signed-off-by: Zhu Yanjun --- include/uapi/linux/rds.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index 8b73cb603c5f..5d0f76c780e5 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -69,6 +69,12 @@ #define RDS_TRANS_COUNT 3 #define RDS_TRANS_NONE (~0) +/* IOCTLS commands for SOL_RDS */ +#define SIOCRDSSETTOS (SIOCPROTOPRIVATE) +#define SIOCRDSGETTOS (SIOCPROTOPRIVATE + 1) + +typedef __u8 rds_tos_t; + /* * Control message types for SOL_RDS. * @@ -149,6 +155,7 @@ struct rds_info_connection { __be32 faddr; __u8 transport[TRANSNAMSIZ]; /* null term ascii */ __u8 flags; + __u8 tos; } __attribute__((packed)); struct rds6_info_connection { @@ -171,6 +178,7 @@ struct rds_info_message { __be16 lport; __be16 fport; __u8 flags; + __u8 tos; } __attribute__((packed)); struct rds6_info_message { @@ -214,6 +222,7 @@ struct rds_info_tcp_socket { __u32 last_sent_nxt; __u32 last_expected_una; __u32 last_seen_una; + __u8 tos; } __attribute__((packed)); struct rds6_info_tcp_socket { @@ -240,6 +249,7 @@ struct rds_info_rdma_connection { __u32 max_send_sge; __u32 rdma_mr_max; __u32 rdma_mr_size; + __u8 tos; }; struct rds6_info_rdma_connection { @@ -253,6 +263,7 @@ struct rds6_info_rdma_connection { __u32 max_send_sge; __u32 rdma_mr_max; __u32 rdma_mr_size; + __u8 tos; }; /* RDS message Receive Path Latency points */ -- cgit v1.2.3 From 9c0644ee4aa8792f1e60a2b014b4710faaddafeb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 1 Feb 2019 17:13:57 -0500 Subject: virtio: drop internal struct from UAPI There's no reason to expose struct vring_packed in UAPI - if we do we won't be able to change or drop it, and it's not part of any interface. Let's move it to virtio_ring.c Cc: Tiwei Bie Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ring.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 2414f8af26b3..4c4e24c291a5 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -213,14 +213,4 @@ struct vring_packed_desc { __le16 flags; }; -struct vring_packed { - unsigned int num; - - struct vring_packed_desc *desc; - - struct vring_packed_desc_event *driver; - - struct vring_packed_desc_event *device; -}; - #endif /* _UAPI_LINUX_VIRTIO_RING_H */ -- cgit v1.2.3 From 2c620ff93d9fbd5d644760d4c21d389078ec1080 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 2 Jul 2018 22:44:20 -0700 Subject: time: Add struct __kernel_timex struct timex uses struct timeval internally. struct timeval is not y2038 safe. Introduce a new UAPI type struct __kernel_timex that is y2038 safe. struct __kernel_timex uses a timeval type that is similar to struct __kernel_timespec which preserves the same structure size across 32 bit and 64 bit ABIs. struct __kernel_timex also restructures other members of the structure to make the structure the same on 64 bit and 32 bit architectures. Note that struct __kernel_timex is the same as struct timex on a 64 bit architecture. The above solution is similar to other new y2038 syscalls that are being introduced: both 32 bit and 64 bit ABIs have a common entry, and the compat entry supports the old 32 bit syscall interface. Alternatives considered were: 1. Add new time type to struct timex that makes use of padded bits. This time type could be based on the struct __kernel_timespec. modes will use a flag to notify which time structure should be used internally. This needs some application level changes on both 64 bit and 32 bit architectures. Although 64 bit machines could continue to use the older timeval structure without any changes. 2. Add a new u8 type to struct timex that makes use of padded bits. This can be used to save higher order tv_sec bits. modes will use a flag to notify presence of such a type. This will need some application level changes on 32 bit architectures. 3. Add a new compat_timex structure that differs in only the size of the time type; keep rest of struct timex the same. This requires extra syscalls to manage all 3 cases on 64 bit architectures. This will not need any application level changes but will add more complexity from kernel side. Signed-off-by: Deepa Dinamani Signed-off-by: Arnd Bergmann --- include/uapi/linux/timex.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/timex.h b/include/uapi/linux/timex.h index 92685d826444..a1c6b73016a5 100644 --- a/include/uapi/linux/timex.h +++ b/include/uapi/linux/timex.h @@ -92,6 +92,47 @@ struct timex { int :32; int :32; int :32; }; +struct __kernel_timex_timeval { + __kernel_time64_t tv_sec; + long long tv_usec; +}; + +#ifndef __kernel_timex +struct __kernel_timex { + unsigned int modes; /* mode selector */ + int :32; /* pad */ + long long offset; /* time offset (usec) */ + long long freq; /* frequency offset (scaled ppm) */ + long long maxerror;/* maximum error (usec) */ + long long esterror;/* estimated error (usec) */ + int status; /* clock command/status */ + int :32; /* pad */ + long long constant;/* pll time constant */ + long long precision;/* clock precision (usec) (read only) */ + long long tolerance;/* clock frequency tolerance (ppm) + * (read only) + */ + struct __kernel_timex_timeval time; /* (read only, except for ADJ_SETOFFSET) */ + long long tick; /* (modified) usecs between clock ticks */ + + long long ppsfreq;/* pps frequency (scaled ppm) (ro) */ + long long jitter; /* pps jitter (us) (ro) */ + int shift; /* interval duration (s) (shift) (ro) */ + int :32; /* pad */ + long long stabil; /* pps stability (scaled ppm) (ro) */ + long long jitcnt; /* jitter limit exceeded (ro) */ + long long calcnt; /* calibration intervals (ro) */ + long long errcnt; /* calibration errors (ro) */ + long long stbcnt; /* stability limit exceeded (ro) */ + + int tai; /* TAI offset (ro) */ + + int :32; int :32; int :32; int :32; + int :32; int :32; int :32; int :32; + int :32; int :32; int :32; +}; +#endif + /* * Mode codes (timex.mode) */ -- cgit v1.2.3 From c70a772fda11570ebddecbce1543a3fda008db4a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 7 Jan 2019 00:00:34 +0100 Subject: y2038: remove struct definition redirects We now use 64-bit time_t on all architectures, so the __kernel_timex, __kernel_timeval and __kernel_timespec redirects can be removed after having served their purpose. This makes it all much less confusing, as the __kernel_* types now always refer to the same layout based on 64-bit time_t across all 32-bit and 64-bit architectures. Signed-off-by: Arnd Bergmann --- include/uapi/linux/time.h | 4 ---- include/uapi/linux/timex.h | 2 -- 2 files changed, 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index 6b56a2208be7..b03f8717c312 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -42,19 +42,15 @@ struct itimerval { struct timeval it_value; /* current value */ }; -#ifndef __kernel_timespec struct __kernel_timespec { __kernel_time64_t tv_sec; /* seconds */ long long tv_nsec; /* nanoseconds */ }; -#endif -#ifndef __kernel_itimerspec struct __kernel_itimerspec { struct __kernel_timespec it_interval; /* timer period */ struct __kernel_timespec it_value; /* timer expiration */ }; -#endif /* * legacy timeval structure, only embedded in structures that diff --git a/include/uapi/linux/timex.h b/include/uapi/linux/timex.h index a1c6b73016a5..9f517f9010bb 100644 --- a/include/uapi/linux/timex.h +++ b/include/uapi/linux/timex.h @@ -97,7 +97,6 @@ struct __kernel_timex_timeval { long long tv_usec; }; -#ifndef __kernel_timex struct __kernel_timex { unsigned int modes; /* mode selector */ int :32; /* pad */ @@ -131,7 +130,6 @@ struct __kernel_timex { int :32; int :32; int :32; int :32; int :32; int :32; int :32; }; -#endif /* * Mode codes (timex.mode) -- cgit v1.2.3 From e9e0c8903009477b630e37a8b6364b26a00720da Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:34 +0200 Subject: fanotify: encode file identifier for FAN_REPORT_FID When user requests the flag FAN_REPORT_FID in fanotify_init(), a unique file identifier of the event target object will be reported with the event. The file identifier includes the filesystem's fsid (i.e. from statfs(2)) and an NFS file handle of the file (i.e. from name_to_handle_at(2)). The file identifier makes holding the path reference and passing a file descriptor to user redundant, so those are disabled in a group with FAN_REPORT_FID. Encode fid and store it in event for a group with FAN_REPORT_FID. Up to 12 bytes of file handle on 32bit arch (16 bytes on 64bit arch) are stored inline in fanotify_event struct. Larger file handles are stored in an external allocated buffer. On failure to encode fid, we print a warning and queue the event without the fid information. [JK: Fold part of later patched into this one to use exportfs_encode_inode_fh() right away] Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 909c98fcace2..d07f3cbc2786 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -44,6 +44,7 @@ /* Flags to determine fanotify event format */ #define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ +#define FAN_REPORT_FID 0x00000200 /* Report unique file id */ /* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ -- cgit v1.2.3 From 5e469c830fdb5a1ebaa69b375b87f583326fd296 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:35 +0200 Subject: fanotify: copy event fid info to user If group requested FAN_REPORT_FID and event has file identifier, copy that information to user reading the event after event metadata. fid information is formatted as struct fanotify_event_info_fid that includes a generic header struct fanotify_event_info_header, so that other info types could be defined in the future using the same header. metadata->event_len includes the length of the fid information. The fid information includes the filesystem's fsid (see statfs(2)) followed by an NFS file handle of the file that could be passed as an argument to open_by_handle_at(2). Cc: Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index d07f3cbc2786..959ae2bdc7ca 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -107,6 +107,26 @@ struct fanotify_event_metadata { __s32 pid; }; +#define FAN_EVENT_INFO_TYPE_FID 1 + +/* Variable length info record following event metadata */ +struct fanotify_event_info_header { + __u8 info_type; + __u8 pad; + __u16 len; +}; + +/* Unique file identifier info record */ +struct fanotify_event_info_fid { + struct fanotify_event_info_header hdr; + __kernel_fsid_t fsid; + /* + * Following is an opaque struct file_handle that can be passed as + * an argument to open_by_handle_at(2). + */ + unsigned char handle[0]; +}; + struct fanotify_response { __s32 fd; __u32 response; -- cgit v1.2.3 From 235328d1fa4251c6dcb32351219bb553a58838d2 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:43 +0200 Subject: fanotify: add support for create/attrib/move/delete events Add support for events with data type FSNOTIFY_EVENT_INODE (e.g. create/attrib/move/delete) for inode and filesystem mark types. The "inode" events do not carry enough information (i.e. path) to report event->fd, so we do not allow setting a mask for those events unless group supports reporting fid. The "inode" events are not supported on a mount mark, because they do not carry enough information (i.e. path) to be filtered by mount point. The "dirent" events (create/move/delete) report the fid of the parent directory where events took place without specifying the filename of the child. In the future, fanotify may get support for reporting filename information for those events. Cc: Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 959ae2bdc7ca..b9effa6f8503 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -7,9 +7,16 @@ /* the following events that user-space can register for */ #define FAN_ACCESS 0x00000001 /* File was accessed */ #define FAN_MODIFY 0x00000002 /* File was modified */ +#define FAN_ATTRIB 0x00000004 /* Metadata changed */ #define FAN_CLOSE_WRITE 0x00000008 /* Writtable file closed */ #define FAN_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ #define FAN_OPEN 0x00000020 /* File was opened */ +#define FAN_MOVED_FROM 0x00000040 /* File was moved from X */ +#define FAN_MOVED_TO 0x00000080 /* File was moved to Y */ +#define FAN_CREATE 0x00000100 /* Subfile was created */ +#define FAN_DELETE 0x00000200 /* Subfile was deleted */ +#define FAN_DELETE_SELF 0x00000400 /* Self was deleted */ +#define FAN_MOVE_SELF 0x00000800 /* Self was moved */ #define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ @@ -24,6 +31,7 @@ /* helper events */ #define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ +#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */ /* flags used for fanotify_init() */ #define FAN_CLOEXEC 0x00000001 -- cgit v1.2.3 From 1db64e8733f653814f041ffe1428524494ef6123 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:32 +0200 Subject: devlink: Add devlink formatted message (fmsg) API Devlink fmsg is a mechanism to pass descriptors between drivers and devlink, in json-like format. The API allows the driver to add nested attributes such as object, object pair and value array, in addition to attributes such as name and value. Driver can use this API to fill the fmsg context in a format which will be translated by the devlink to the netlink message later. There is no memory allocation in advance (other than the initial list head), and it dynamically allocates messages descriptors and add them to the list on the fly. When it needs to send the data using SKBs to the netlink layer, it fragments the data between different SKBs. In order to do this fragmentation, it uses virtual nests attributes, to avoid actual nesting use which cannot be divided between different SKBs. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 054b2d1a4537..076692209a9b 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -302,6 +302,14 @@ enum devlink_attr { DEVLINK_ATTR_SB_POOL_CELL_SIZE, /* u32 */ + DEVLINK_ATTR_FMSG, /* nested */ + DEVLINK_ATTR_FMSG_OBJ_NEST_START, /* flag */ + DEVLINK_ATTR_FMSG_PAIR_NEST_START, /* flag */ + DEVLINK_ATTR_FMSG_ARR_NEST_START, /* flag */ + DEVLINK_ATTR_FMSG_NEST_END, /* flag */ + DEVLINK_ATTR_FMSG_OBJ_NAME, /* string */ + DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE, /* u8 */ + DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA, /* dynamic */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From 7afe335a8bede4e2839b0e0fa36ef629fe4a0206 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:35 +0200 Subject: devlink: Add health get command Add devlink health get command to provide reporter/s data for user space. Add the ability to get data per reporter or dump data from all available reporters. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 076692209a9b..d8f20d6ce139 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -96,6 +96,8 @@ enum devlink_command { DEVLINK_CMD_INFO_GET, /* can dump */ + DEVLINK_CMD_HEALTH_REPORTER_GET, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -310,6 +312,15 @@ enum devlink_attr { DEVLINK_ATTR_FMSG_OBJ_NAME, /* string */ DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE, /* u8 */ DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA, /* dynamic */ + + DEVLINK_ATTR_HEALTH_REPORTER, /* nested */ + DEVLINK_ATTR_HEALTH_REPORTER_NAME, /* string */ + DEVLINK_ATTR_HEALTH_REPORTER_STATE, /* u8 */ + DEVLINK_ATTR_HEALTH_REPORTER_ERR, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, /* u8 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From a1e55ec0a0c6969cb7e9d9080a84041bb7b2b6e6 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:36 +0200 Subject: devlink: Add health set command Add devlink health set command, in order to set configuration parameters for a specific reporter. Supported parameters are: - graceful_period: Time interval between auto recoveries (in msec) - auto_recover: Determines if the devlink shall execute recover upon receiving error for the reporter Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index d8f20d6ce139..b03065a99884 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -97,6 +97,7 @@ enum devlink_command { DEVLINK_CMD_INFO_GET, /* can dump */ DEVLINK_CMD_HEALTH_REPORTER_GET, + DEVLINK_CMD_HEALTH_REPORTER_SET, /* add new commands above here */ __DEVLINK_CMD_MAX, -- cgit v1.2.3 From 20a0943a5b237f7d59dc581e9e3637f5c87f1fde Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:37 +0200 Subject: devlink: Add health recover command Add devlink health recover command to the uapi, in order to allow the user to execute a recover operation over a specific reporter. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b03065a99884..a3a97e6edad8 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -98,6 +98,7 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_GET, DEVLINK_CMD_HEALTH_REPORTER_SET, + DEVLINK_CMD_HEALTH_REPORTER_RECOVER, /* add new commands above here */ __DEVLINK_CMD_MAX, -- cgit v1.2.3 From fca42a2794e31379855c7d687055da43a6e05eef Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:38 +0200 Subject: devlink: Add health diagnose command Add devlink health diagnose command, in order to run a diagnose operation over a specific reporter. It is expected from driver's callback for diagnose command to fill it via the devlink fmsg API. Devlink will parse it and convert it to netlink nla API in order to pass it to the user. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index a3a97e6edad8..09be37137841 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -99,6 +99,7 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_GET, DEVLINK_CMD_HEALTH_REPORTER_SET, DEVLINK_CMD_HEALTH_REPORTER_RECOVER, + DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, /* add new commands above here */ __DEVLINK_CMD_MAX, -- cgit v1.2.3 From 35455e23e6f3cffe20e2b948e57597a8dc240b1e Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:39 +0200 Subject: devlink: Add health dump {get,clear} commands Add devlink health dump commands, in order to run an dump operation over a specific reporter. The supported operations are dump_get in order to get last saved dump (if not exist, dump now) and dump_clear to clear last saved dump. It is expected from driver's callback for diagnose command to fill it via the devlink fmsg API. Devlink will parse it and convert it to netlink nla API in order to pass it to the user. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 09be37137841..72d9f7c89190 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -100,6 +100,8 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_SET, DEVLINK_CMD_HEALTH_REPORTER_RECOVER, DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, /* add new commands above here */ __DEVLINK_CMD_MAX, -- cgit v1.2.3 From 998a8a8387ff5f65da456d1fc448dbb926fb5d78 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 7 Feb 2019 21:41:46 +0100 Subject: net: phy: let genphy_c45_read_link manage the devices to check Let genphy_c45_read_link manage the devices to check, this removes overhead from callers. Add C22EXT to the list of excluded devices because it doesn't implement the status register. According to the 802.3 clause 45 spec registers 29.0 - 29.4 are reserved. At the moment we have very few clause 45 PHY drivers, so we are lacking experience whether other drivers will have to exclude further devices, or may need to check PHY XS. If we should figure out that list of devices to check needs to be configurable, I think best will be to add a device list member to struct phy_driver. v2: - adjusted commit message - exclude also device C22EXT from link checking Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/mdio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index d435b00d64ad..2e6e309f0847 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -123,6 +123,8 @@ #define MDIO_DEVS_TC MDIO_DEVS_PRESENT(MDIO_MMD_TC) #define MDIO_DEVS_AN MDIO_DEVS_PRESENT(MDIO_MMD_AN) #define MDIO_DEVS_C22EXT MDIO_DEVS_PRESENT(MDIO_MMD_C22EXT) +#define MDIO_DEVS_VEND1 MDIO_DEVS_PRESENT(MDIO_MMD_VEND1) +#define MDIO_DEVS_VEND2 MDIO_DEVS_PRESENT(MDIO_MMD_VEND2) /* Control register 2. */ #define MDIO_PMA_CTRL2_TYPE 0x000f /* PMA/PMD type selection */ -- cgit v1.2.3 From 3b5e74e0afe3382f9354b657714ac40673b7c597 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 8 Feb 2019 19:25:22 +0100 Subject: net: phy: disregard "Clause 22 registers present" bit in get_phy_c45_devs_in_pkg Bit 0 in register 1.5 doesn't represent a device but is a flag that Clause 22 registers are present. Therefore disregard this bit when populating the device list. If code needs this information it should read register 1.5 directly instead of accessing the device list. Because this bit doesn't represent a device don't define a MDIO_MMD_XYZ constant, just define a MDIO_DEVS_XYZ constant for the flag in the device list bitmap. v2: - make masking of bit 0 more explicit - improve commit message Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/uapi/linux/mdio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 2e6e309f0847..0e012b168e4d 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -115,6 +115,7 @@ /* Device present registers. */ #define MDIO_DEVS_PRESENT(devad) (1 << (devad)) +#define MDIO_DEVS_C22PRESENT MDIO_DEVS_PRESENT(0) #define MDIO_DEVS_PMAPMD MDIO_DEVS_PRESENT(MDIO_MMD_PMAPMD) #define MDIO_DEVS_WIS MDIO_DEVS_PRESENT(MDIO_MMD_WIS) #define MDIO_DEVS_PCS MDIO_DEVS_PRESENT(MDIO_MMD_PCS) -- cgit v1.2.3 From 180cf62cec0418a64dade18b3575047af46c6335 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 9 Feb 2019 00:05:36 +0100 Subject: batman-adv: Fix typo "reseved" -> "reserved" checkpatch.pl complains since commit 45e417022023 ("scripts/spelling.txt: add more spellings to spelling.txt") about an additional spelling mistake in batman-adv:` CHECK: 'reseved' may be misspelled - perhaps 'reserved'? #232: FILE: include/uapi/linux/batadv_packet.h:232: + * @flags: reseved for routing relevant flags - currently always 0 Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batadv_packet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index 7eb2936a8e22..c99336f4eefe 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -229,7 +229,7 @@ struct batadv_ogm_packet { * @packet_type: batman-adv packet type, part of the general header * @version: batman-adv protocol version, part of the general header * @ttl: time to live for this packet, part of the general header - * @flags: reseved for routing relevant flags - currently always 0 + * @flags: reserved for routing relevant flags - currently always 0 * @seqno: sequence number * @orig: originator mac address * @tvlv_len: length of the appended tvlv buffer (in bytes) -- cgit v1.2.3 From 60040513536097584c3d55b39acdfa7080645d80 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 12:14:56 +0100 Subject: batman-adv: Prepare framework for mesh genl config The batman-adv configuration interface was implemented solely using sysfs. This approach was condemned by non-batadv developers as "huge mistake". Instead a netlink/genl based implementation was suggested. The main objects for this configuration is the mesh/soft-interface object. Its actual object in memory already contains most of the available configuration settings. The genl interface reflects this by allowing to get/set it using the mesh specific commands. The BATADV_CMD_GET_MESH_INFO (or short version BATADV_CMD_GET_MESH) is reused as get command because it already provides the content of other information from the mesh/soft-interface which are not yet configuration specific. The set command BATADV_CMD_SET_MESH will also notify interested userspace listeners of the "config" mcast group using the BATADV_CMD_SET_MESH command message type that settings might have been changed and what the current values are. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index a28e76a7e0a2..b3394aac9a88 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -27,6 +27,7 @@ #define BATADV_NL_NAME "batadv" +#define BATADV_NL_MCAST_GROUP_CONFIG "config" #define BATADV_NL_MCAST_GROUP_TPMETER "tpmeter" /** @@ -372,10 +373,14 @@ enum batadv_nl_commands { BATADV_CMD_UNSPEC, /** - * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv - * device + * @BATADV_CMD_GET_MESH: Get attributes from softif/mesh */ - BATADV_CMD_GET_MESH_INFO, + BATADV_CMD_GET_MESH, + + /** + * @BATADV_CMD_GET_MESH_INFO: Alias for @BATADV_CMD_GET_MESH + */ + BATADV_CMD_GET_MESH_INFO = BATADV_CMD_GET_MESH, /** * @BATADV_CMD_TP_METER: Start a tp meter session @@ -443,6 +448,11 @@ enum batadv_nl_commands { */ BATADV_CMD_GET_MCAST_FLAGS, + /** + * @BATADV_CMD_SET_MESH: Set attributes for softif/mesh + */ + BATADV_CMD_SET_MESH, + /* add new commands above here */ /** -- cgit v1.2.3 From 5c55a40fa801df2d807141319c0fdbb3939c3947 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 12:33:17 +0100 Subject: batman-adv: Prepare framework for hardif genl config The batman-adv configuration interface was implemented solely using sysfs. This approach was condemned by non-batadv developers as "huge mistake". Instead a netlink/genl based implementation was suggested. Beside the mesh/soft-interface specific configuration, the slave/hard-interface have B.A.T.M.A.N. V specific configuration settings. The genl interface reflects this by allowing to get/set it using the hard-interface specific commands. The BATADV_CMD_GET_HARDIFS (or short version BATADV_CMD_GET_HARDIF) is reused as get command because it already allow sto dump the content of other information from the slave/hard-interface which are not yet configuration specific. The set command BATADV_CMD_SET_HARDIF will also notify interested userspace listeners of the "config" mcast group using the BATADV_CMD_SET_HARDIF command message type that settings might have been changed and what the current values are. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index b3394aac9a88..62456a087ef6 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -398,9 +398,15 @@ enum batadv_nl_commands { BATADV_CMD_GET_ROUTING_ALGOS, /** - * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces + * @BATADV_CMD_GET_HARDIF: Get attributes from a hardif of the + * current softif */ - BATADV_CMD_GET_HARDIFS, + BATADV_CMD_GET_HARDIF, + + /** + * @BATADV_CMD_GET_HARDIFS: Alias for @BATADV_CMD_GET_HARDIF + */ + BATADV_CMD_GET_HARDIFS = BATADV_CMD_GET_HARDIF, /** * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations @@ -453,6 +459,12 @@ enum batadv_nl_commands { */ BATADV_CMD_SET_MESH, + /** + * @BATADV_CMD_SET_HARDIF: Set attributes for hardif of the + * current softif + */ + BATADV_CMD_SET_HARDIF, + /* add new commands above here */ /** -- cgit v1.2.3 From 49e7e37cd98122126e8da58df2fe2261c6e83df2 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 12:41:08 +0100 Subject: batman-adv: Prepare framework for vlan genl config The batman-adv configuration interface was implemented solely using sysfs. This approach was condemned by non-batadv developers as "huge mistake". Instead a netlink/genl based implementation was suggested. Beside the mesh/soft-interface specific configuration, the VLANs on top of the mesh/soft-interface have configuration settings. The genl interface reflects this by allowing to get/set it using the vlan specific commands BATADV_CMD_GET_VLAN/BATADV_CMD_SET_VLAN. The set command BATADV_CMD_SET_MESH will also notify interested userspace listeners of the "config" mcast group using the BATADV_CMD_SET_VLAN command message type that settings might have been changed and what the current values are. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 62456a087ef6..10f3cc34fe16 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -345,6 +345,11 @@ enum batadv_nl_attrs { */ BATADV_ATTR_MCAST_FLAGS_PRIV, + /** + * @BATADV_ATTR_VLANID: VLAN id on top of soft interface + */ + BATADV_ATTR_VLANID, + /* add attributes above here, update the policy in netlink.c */ /** @@ -465,6 +470,18 @@ enum batadv_nl_commands { */ BATADV_CMD_SET_HARDIF, + /** + * @BATADV_CMD_GET_VLAN: Get attributes from a VLAN of the + * current softif + */ + BATADV_CMD_GET_VLAN, + + /** + * @BATADV_CMD_SET_VLAN: Set attributes for VLAN of the + * current softif + */ + BATADV_CMD_SET_VLAN, + /* add new commands above here */ /** -- cgit v1.2.3 From 9ab4cee5ced970019a5d0f3a43cf85671ca7b38f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 12:46:14 +0100 Subject: batman-adv: Add aggregated_ogms mesh genl configuration The mesh interface can delay OGM messages to aggregate different ogms together in a single OGM packet. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the BATADV_ATTR_AGGREGATED_OGMS_ENABLED attribute. Setting the u8 to zero will disable this feature and setting it to something else is enabling this feature. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 10f3cc34fe16..f8941e80d6b4 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -350,6 +350,12 @@ enum batadv_nl_attrs { */ BATADV_ATTR_VLANID, + /** + * @BATADV_ATTR_AGGREGATED_OGMS_ENABLED: whether the batman protocol + * messages of the mesh interface shall be aggregated or not. + */ + BATADV_ATTR_AGGREGATED_OGMS_ENABLED, + /* add attributes above here, update the policy in netlink.c */ /** -- cgit v1.2.3 From e43d16b87dc2cad18799cfd1142f4acae4135ea4 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 12:51:55 +0100 Subject: batman-adv: Add ap_isolation mesh/vlan genl configuration The mesh interface can drop messages between clients to implement a mesh-wide AP isolation. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH and BATADV_CMD_SET_VLAN/BATADV_CMD_GET_VLAN commands allow to set/get the configuration of this feature using the BATADV_ATTR_AP_ISOLATION_ENABLED attribute. Setting the u8 to zero will disable this feature and setting it to something else is enabling this feature. This feature also requires that skbuff which should be handled as isolated are marked. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the mark/mask using the u32 attributes BATADV_ATTR_ISOLATION_MARK and BATADV_ATTR_ISOLATION_MASK. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index f8941e80d6b4..a4dadafe08dd 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -356,6 +356,25 @@ enum batadv_nl_attrs { */ BATADV_ATTR_AGGREGATED_OGMS_ENABLED, + /** + * @BATADV_ATTR_AP_ISOLATION_ENABLED: whether the data traffic going + * from a wireless client to another wireless client will be silently + * dropped. + */ + BATADV_ATTR_AP_ISOLATION_ENABLED, + + /** + * @BATADV_ATTR_ISOLATION_MARK: the isolation mark which is used to + * classify clients as "isolated" by the Extended Isolation feature. + */ + BATADV_ATTR_ISOLATION_MARK, + + /** + * @BATADV_ATTR_ISOLATION_MASK: the isolation (bit)mask which is used to + * classify clients as "isolated" by the Extended Isolation feature. + */ + BATADV_ATTR_ISOLATION_MASK, + /* add attributes above here, update the policy in netlink.c */ /** -- cgit v1.2.3 From d7e52506b680826d6ff7ce73e6a90a3b9defc741 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 12:55:44 +0100 Subject: batman-adv: Add bonding mesh genl configuration The mesh interface can use multiple slave/hard-interface ports at the same time to transport the traffic to other nodes. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the BATADV_ATTR_BONDING_ENABLED attribute. Setting the u8 to zero will disable this feature and setting it to something else is enabling this feature. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index a4dadafe08dd..f74ff261ec8f 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -375,6 +375,12 @@ enum batadv_nl_attrs { */ BATADV_ATTR_ISOLATION_MASK, + /** + * @BATADV_ATTR_BONDING_ENABLED: whether the data traffic going through + * the mesh will be sent using multiple interfaces at the same time. + */ + BATADV_ATTR_BONDING_ENABLED, + /* add attributes above here, update the policy in netlink.c */ /** -- cgit v1.2.3 From 43ff6105a527aaa1a8e00163b0b0aedbbc0c4522 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:03:39 +0100 Subject: batman-adv: Add bridge_loop_avoidance mesh genl configuration The mesh interface can try to detect loops in the same mesh caused by (indirectly) bridged mesh/soft-interfaces of different nodes. Some of the loops can also be resolved without breaking the mesh. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED attribute. Setting the u8 to zero will disable this feature and setting it to something else is enabling this feature. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index f74ff261ec8f..3cb35c661056 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -381,6 +381,13 @@ enum batadv_nl_attrs { */ BATADV_ATTR_BONDING_ENABLED, + /** + * @BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED: whether the bridge loop + * avoidance feature is enabled. This feature detects and avoids loops + * between the mesh and devices bridged with the soft interface + */ + BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED, + /* add attributes above here, update the policy in netlink.c */ /** -- cgit v1.2.3 From a1c8de80329609ba68ff860074070efb1e14ade4 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:06:42 +0100 Subject: batman-adv: Add distributed_arp_table mesh genl configuration The mesh interface can use a distributed hash table to answer ARP requests without flooding the request through the whole mesh. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED attribute. Setting the u8 to zero will disable this feature and setting it to something else is enabling this feature. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 3cb35c661056..f303a1496476 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -388,6 +388,14 @@ enum batadv_nl_attrs { */ BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED, + /** + * @BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED: whether the distributed + * arp table feature is enabled. This feature uses a distributed hash + * table to answer ARP requests without flooding the request through + * the whole mesh. + */ + BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED, + /* add attributes above here, update the policy in netlink.c */ /** -- cgit v1.2.3 From 3e15b06eb7e410ef9f1b9673be094b3e10eacf93 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:09:49 +0100 Subject: batman-adv: Add fragmentation mesh genl configuration The mesh interface can fragment unicast packets when the packet size exceeds the outgoing slave/hard-interface MTU. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the BATADV_ATTR_FRAGMENTATION_ENABLED attribute. Setting the u8 to zero will disable this feature and setting it to something else is enabling this feature. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index f303a1496476..847841b8de5d 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -396,6 +396,13 @@ enum batadv_nl_attrs { */ BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED, + /** + * @BATADV_ATTR_FRAGMENTATION_ENABLED: whether the data traffic going + * through the mesh will be fragmented or silently discarded if the + * packet size exceeds the outgoing interface MTU. + */ + BATADV_ATTR_FRAGMENTATION_ENABLED, + /* add attributes above here, update the policy in netlink.c */ /** -- cgit v1.2.3 From e2d0d35b5b0ce420505e88255fd5922ed035bb8d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:15:00 +0100 Subject: batman-adv: Add gateway mesh genl configuration The mesh/soft-interface can optimize the handling of DHCP packets. Instead of flooding them through the whole mesh, it can be forwarded as unicast to a specific gateway server. The originator which injects the packets in the mesh has to select (based on sel_class thresholds) a responsible gateway server. This is done by switching this originator to the gw_mode client. The servers announce their forwarding bandwidth (download/upload) when the gw_mode server was selected. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the attributes: * u8 BATADV_ATTR_GW_MODE (0 == off, 1 == client, 2 == server) * u32 BATADV_ATTR_GW_BANDWIDTH_DOWN (in 100 kbit/s steps) * u32 BATADV_ATTR_GW_BANDWIDTH_UP (in 100 kbit/s steps) * u32 BATADV_ATTR_GW_SEL_CLASS Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 847841b8de5d..165272be6878 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -139,6 +139,20 @@ enum batadv_mcast_flags_priv { BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING = (1 << 4), }; +/** + * enum batadv_gw_modes - gateway mode of node + */ +enum batadv_gw_modes { + /** @BATADV_GW_MODE_OFF: gw mode disabled */ + BATADV_GW_MODE_OFF, + + /** @BATADV_GW_MODE_CLIENT: send DHCP requests to gw servers */ + BATADV_GW_MODE_CLIENT, + + /** @BATADV_GW_MODE_SERVER: announce itself as gatway server */ + BATADV_GW_MODE_SERVER, +}; + /** * enum batadv_nl_attrs - batman-adv netlink attributes */ @@ -403,6 +417,32 @@ enum batadv_nl_attrs { */ BATADV_ATTR_FRAGMENTATION_ENABLED, + /** + * @BATADV_ATTR_GW_BANDWIDTH_DOWN: defines the download bandwidth which + * is propagated by this node if %BATADV_ATTR_GW_BANDWIDTH_MODE was set + * to 'server'. + */ + BATADV_ATTR_GW_BANDWIDTH_DOWN, + + /** + * @BATADV_ATTR_GW_BANDWIDTH_UP: defines the upload bandwidth which + * is propagated by this node if %BATADV_ATTR_GW_BANDWIDTH_MODE was set + * to 'server'. + */ + BATADV_ATTR_GW_BANDWIDTH_UP, + + /** + * @BATADV_ATTR_GW_MODE: defines the state of the gateway features. + * Possible values are specified in enum batadv_gw_modes + */ + BATADV_ATTR_GW_MODE, + + /** + * @BATADV_ATTR_GW_SEL_CLASS: defines the selection criteria this node + * will use to choose a gateway if gw_mode was set to 'client'. + */ + BATADV_ATTR_GW_SEL_CLASS, + /* add attributes above here, update the policy in netlink.c */ /** -- cgit v1.2.3 From bfc7f1be57b8a5ea738ce5db62b82234e4901abf Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:19:38 +0100 Subject: batman-adv: Add hop_penalty mesh genl configuration The TQ (B.A.T.M.A.N. IV) and throughput values (B.A.T.M.A.N. V) are reduced when they are forwarded. One of the reductions is the penalty for traversing an additional hop. This hop_penalty (0-255) defines the percentage of reduction (0-100%). The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the u8 BATADV_ATTR_HOP_PENALTY attribute. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 165272be6878..b37cb923332e 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -443,6 +443,12 @@ enum batadv_nl_attrs { */ BATADV_ATTR_GW_SEL_CLASS, + /** + * @BATADV_ATTR_HOP_PENALTY: defines the penalty which will be applied + * to an originator message's tq-field on every hop. + */ + BATADV_ATTR_HOP_PENALTY, + /* add attributes above here, update the policy in netlink.c */ /** -- cgit v1.2.3 From b85bd091098a52f7bf00d2725b536455f82ba0d0 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:22:33 +0100 Subject: batman-adv: Add log_level mesh genl configuration In contrast to other modules, batman-adv allows to set the debug message verbosity per mesh/soft-interface and not per module (via modparam). The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the u32 (bitmask) BATADV_ATTR_LOG_LEVEL attribute. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index b37cb923332e..6d36e4b47eb4 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -449,6 +449,12 @@ enum batadv_nl_attrs { */ BATADV_ATTR_HOP_PENALTY, + /** + * @BATADV_ATTR_LOG_LEVEL: bitmask with to define which debug messages + * should be send to the debug log/trace ring buffer + */ + BATADV_ATTR_LOG_LEVEL, + /* add attributes above here, update the policy in netlink.c */ /** -- cgit v1.2.3 From f75b56bc91122e2934e2cb458f98727c41d535c7 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:25:05 +0100 Subject: batman-adv: Add multicast forceflood mesh genl configuration The mesh interface can optimize the flooding of multicast packets based on the content of the global translation tables. To disable this behavior and use the broadcast-like flooding of the packets, forceflood has to be enabled. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED attribute. Setting the u8 to zero will disable this feature (allowing multicast optimizations) and setting it to something else is enabling this feature (forcing simple flooding). Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 6d36e4b47eb4..38caaaae8a05 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -455,6 +455,15 @@ enum batadv_nl_attrs { */ BATADV_ATTR_LOG_LEVEL, + /** + * @BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED: whether multicast + * optimizations should be replaced by simple broadcast-like flooding + * of multicast packets. If set to non-zero then all nodes in the mesh + * are going to use classic flooding for any multicast packet with no + * optimizations. + */ + BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED, + /* add attributes above here, update the policy in netlink.c */ /** -- cgit v1.2.3 From 6c57cde6800bae2361b8ac14a5924ffc592b3a90 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:26:14 +0100 Subject: batman-adv: Add network_coding mesh genl configuration The mesh interface can use (in an homogeneous mesh) network coding, a mechanism that aims to increase the overall network throughput by fusing multiple packets in one transmission. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the BATADV_ATTR_NETWORK_CODING_ENABLED attribute. Setting the u8 to zero will disable this feature and setting it to something else is enabling this feature. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 38caaaae8a05..a4239c147bde 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -464,6 +464,13 @@ enum batadv_nl_attrs { */ BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED, + /** + * @BATADV_ATTR_NETWORK_CODING_ENABLED: whether Network Coding (using + * some magic to send fewer wifi packets but still the same content) is + * enabled or not. + */ + BATADV_ATTR_NETWORK_CODING_ENABLED, + /* add attributes above here, update the policy in netlink.c */ /** -- cgit v1.2.3 From 7b751b39f018a828a04692e199c044087102e96c Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:28:02 +0100 Subject: batman-adv: Add orig_interval mesh genl configuration The OGM packets are transmitted every orig_interval milliseconds. This value can be changed using the configuration interface. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the u32 BATADV_ATTR_ORIG_INTERVAL attribute. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index a4239c147bde..6bedd4889c37 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -471,6 +471,12 @@ enum batadv_nl_attrs { */ BATADV_ATTR_NETWORK_CODING_ENABLED, + /** + * @BATADV_ATTR_ORIG_INTERVAL: defines the interval in milliseconds in + * which batman sends its protocol messages. + */ + BATADV_ATTR_ORIG_INTERVAL, + /* add attributes above here, update the policy in netlink.c */ /** -- cgit v1.2.3 From a108008290405545b43b9c7975344bc59af2341b Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:30:04 +0100 Subject: batman-adv: Add elp_interval hardif genl configuration The ELP packets are transmitted every elp_interval milliseconds on an slave/hard-interface. This value can be changed using the configuration interface. The BATADV_CMD_SET_HARDIF/BATADV_CMD_GET_HARDIF commands allow to set/get the configuration of this feature using the u32 BATADV_ATTR_ELP_INTERVAL attribute. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 6bedd4889c37..f966e497361b 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -477,6 +477,12 @@ enum batadv_nl_attrs { */ BATADV_ATTR_ORIG_INTERVAL, + /** + * @BATADV_ATTR_ELP_INTERVAL: defines the interval in milliseconds in + * which batman emits probing packets for neighbor sensing (ELP). + */ + BATADV_ATTR_ELP_INTERVAL, + /* add attributes above here, update the policy in netlink.c */ /** -- cgit v1.2.3 From 9a182242f17c06fad620663e6fdf992e97661e66 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:31:23 +0100 Subject: batman-adv: Add throughput_override hardif genl configuration The B.A.T.M.A.N. V implementation tries to estimate the link throughput of an interface to an originator using different automatic methods. It is still possible to overwrite it the link throughput for all reachable originators via this interface. The BATADV_CMD_SET_HARDIF/BATADV_CMD_GET_HARDIF commands allow to set/get the configuration of this feature using the u32 BATADV_ATTR_THROUGHPUT_OVERRIDE attribute. The used unit is in 100 Kbit/s. If the value is set to 0 then batman-adv will try to estimate the throughput by itself. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index f966e497361b..305bf316dd03 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -483,6 +483,14 @@ enum batadv_nl_attrs { */ BATADV_ATTR_ELP_INTERVAL, + /** + * @BATADV_ATTR_THROUGHPUT_OVERRIDE: defines the throughput value to be + * used by B.A.T.M.A.N. V when estimating the link throughput using + * this interface. If the value is set to 0 then batman-adv will try to + * estimate the throughput by itself. + */ + BATADV_ATTR_THROUGHPUT_OVERRIDE, + /* add attributes above here, update the policy in netlink.c */ /** -- cgit v1.2.3 From 257eeded20b34219d5484cfc415b3e39093f37b8 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 10 Feb 2019 14:24:59 +0200 Subject: net: Move all TC actions identifiers to one place Move all the TC identifiers to one place, to the same enum that defines the identifier of police action. This makes it easier choose numbers for new actions since they are now defined in one place. We preserve the original values for binary compatibility. New IDs should be added inside the enum. Signed-off-by: Eli Cohen Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 43 ++++++++++++++++++++++++++++--- include/uapi/linux/tc_act/tc_bpf.h | 2 -- include/uapi/linux/tc_act/tc_connmark.h | 2 -- include/uapi/linux/tc_act/tc_csum.h | 2 -- include/uapi/linux/tc_act/tc_gact.h | 1 - include/uapi/linux/tc_act/tc_ife.h | 1 - include/uapi/linux/tc_act/tc_ipt.h | 3 --- include/uapi/linux/tc_act/tc_mirred.h | 1 - include/uapi/linux/tc_act/tc_nat.h | 2 -- include/uapi/linux/tc_act/tc_pedit.h | 2 -- include/uapi/linux/tc_act/tc_sample.h | 2 -- include/uapi/linux/tc_act/tc_skbedit.h | 2 -- include/uapi/linux/tc_act/tc_skbmod.h | 2 -- include/uapi/linux/tc_act/tc_tunnel_key.h | 2 -- include/uapi/linux/tc_act/tc_vlan.h | 2 -- 15 files changed, 40 insertions(+), 29 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 02ac251be8c4..7ab55f97e7c4 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -63,12 +63,49 @@ enum { #define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2) #define TC_ACT_EXT_OPCODE_MAX TC_ACT_GOTO_CHAIN +/* These macros are put here for binary compatibility with userspace apps that + * make use of them. For kernel code and new userspace apps, use the TCA_ID_* + * versions. + */ +#define TCA_ACT_GACT 5 +#define TCA_ACT_IPT 6 +#define TCA_ACT_PEDIT 7 +#define TCA_ACT_MIRRED 8 +#define TCA_ACT_NAT 9 +#define TCA_ACT_XT 10 +#define TCA_ACT_SKBEDIT 11 +#define TCA_ACT_VLAN 12 +#define TCA_ACT_BPF 13 +#define TCA_ACT_CONNMARK 14 +#define TCA_ACT_SKBMOD 15 +#define TCA_ACT_CSUM 16 +#define TCA_ACT_TUNNEL_KEY 17 +#define TCA_ACT_SIMP 22 +#define TCA_ACT_IFE 25 +#define TCA_ACT_SAMPLE 26 + /* Action type identifiers*/ enum { - TCA_ID_UNSPEC=0, - TCA_ID_POLICE=1, + TCA_ID_UNSPEC = 0, + TCA_ID_POLICE = 1, + TCA_ID_GACT = TCA_ACT_GACT, + TCA_ID_IPT = TCA_ACT_IPT, + TCA_ID_PEDIT = TCA_ACT_PEDIT, + TCA_ID_MIRRED = TCA_ACT_MIRRED, + TCA_ID_NAT = TCA_ACT_NAT, + TCA_ID_XT = TCA_ACT_XT, + TCA_ID_SKBEDIT = TCA_ACT_SKBEDIT, + TCA_ID_VLAN = TCA_ACT_VLAN, + TCA_ID_BPF = TCA_ACT_BPF, + TCA_ID_CONNMARK = TCA_ACT_CONNMARK, + TCA_ID_SKBMOD = TCA_ACT_SKBMOD, + TCA_ID_CSUM = TCA_ACT_CSUM, + TCA_ID_TUNNEL_KEY = TCA_ACT_TUNNEL_KEY, + TCA_ID_SIMP = TCA_ACT_SIMP, + TCA_ID_IFE = TCA_ACT_IFE, + TCA_ID_SAMPLE = TCA_ACT_SAMPLE, /* other actions go here */ - __TCA_ID_MAX=255 + __TCA_ID_MAX = 255 }; #define TCA_ID_MAX __TCA_ID_MAX diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h index 6e89a5df49a4..653c4f94f76e 100644 --- a/include/uapi/linux/tc_act/tc_bpf.h +++ b/include/uapi/linux/tc_act/tc_bpf.h @@ -13,8 +13,6 @@ #include -#define TCA_ACT_BPF 13 - struct tc_act_bpf { tc_gen; }; diff --git a/include/uapi/linux/tc_act/tc_connmark.h b/include/uapi/linux/tc_act/tc_connmark.h index 80caa47b1933..9f8f6f709feb 100644 --- a/include/uapi/linux/tc_act/tc_connmark.h +++ b/include/uapi/linux/tc_act/tc_connmark.h @@ -5,8 +5,6 @@ #include #include -#define TCA_ACT_CONNMARK 14 - struct tc_connmark { tc_gen; __u16 zone; diff --git a/include/uapi/linux/tc_act/tc_csum.h b/include/uapi/linux/tc_act/tc_csum.h index 0ecf4d29e2f3..94b2044929de 100644 --- a/include/uapi/linux/tc_act/tc_csum.h +++ b/include/uapi/linux/tc_act/tc_csum.h @@ -5,8 +5,6 @@ #include #include -#define TCA_ACT_CSUM 16 - enum { TCA_CSUM_UNSPEC, TCA_CSUM_PARMS, diff --git a/include/uapi/linux/tc_act/tc_gact.h b/include/uapi/linux/tc_act/tc_gact.h index 94273c3b81b0..37e5392e02c7 100644 --- a/include/uapi/linux/tc_act/tc_gact.h +++ b/include/uapi/linux/tc_act/tc_gact.h @@ -5,7 +5,6 @@ #include #include -#define TCA_ACT_GACT 5 struct tc_gact { tc_gen; diff --git a/include/uapi/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h index 2f48490ef386..8c401f185675 100644 --- a/include/uapi/linux/tc_act/tc_ife.h +++ b/include/uapi/linux/tc_act/tc_ife.h @@ -6,7 +6,6 @@ #include #include -#define TCA_ACT_IFE 25 /* Flag bits for now just encoding/decoding; mutually exclusive */ #define IFE_ENCODE 1 #define IFE_DECODE 0 diff --git a/include/uapi/linux/tc_act/tc_ipt.h b/include/uapi/linux/tc_act/tc_ipt.h index b743c8bddd13..c48d7da6750d 100644 --- a/include/uapi/linux/tc_act/tc_ipt.h +++ b/include/uapi/linux/tc_act/tc_ipt.h @@ -4,9 +4,6 @@ #include -#define TCA_ACT_IPT 6 -#define TCA_ACT_XT 10 - enum { TCA_IPT_UNSPEC, TCA_IPT_TABLE, diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h index 5dd671cf5776..2500a0005d05 100644 --- a/include/uapi/linux/tc_act/tc_mirred.h +++ b/include/uapi/linux/tc_act/tc_mirred.h @@ -5,7 +5,6 @@ #include #include -#define TCA_ACT_MIRRED 8 #define TCA_EGRESS_REDIR 1 /* packet redirect to EGRESS*/ #define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */ #define TCA_INGRESS_REDIR 3 /* packet redirect to INGRESS*/ diff --git a/include/uapi/linux/tc_act/tc_nat.h b/include/uapi/linux/tc_act/tc_nat.h index 086be842587b..21399c2c6130 100644 --- a/include/uapi/linux/tc_act/tc_nat.h +++ b/include/uapi/linux/tc_act/tc_nat.h @@ -5,8 +5,6 @@ #include #include -#define TCA_ACT_NAT 9 - enum { TCA_NAT_UNSPEC, TCA_NAT_PARMS, diff --git a/include/uapi/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h index 24ec792dacc1..f3e61b04fa01 100644 --- a/include/uapi/linux/tc_act/tc_pedit.h +++ b/include/uapi/linux/tc_act/tc_pedit.h @@ -5,8 +5,6 @@ #include #include -#define TCA_ACT_PEDIT 7 - enum { TCA_PEDIT_UNSPEC, TCA_PEDIT_TM, diff --git a/include/uapi/linux/tc_act/tc_sample.h b/include/uapi/linux/tc_act/tc_sample.h index bd7e9f03abd2..fee1bcc20793 100644 --- a/include/uapi/linux/tc_act/tc_sample.h +++ b/include/uapi/linux/tc_act/tc_sample.h @@ -6,8 +6,6 @@ #include #include -#define TCA_ACT_SAMPLE 26 - struct tc_sample { tc_gen; }; diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index 6de6071ebed6..800e93377218 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -23,8 +23,6 @@ #include -#define TCA_ACT_SKBEDIT 11 - #define SKBEDIT_F_PRIORITY 0x1 #define SKBEDIT_F_QUEUE_MAPPING 0x2 #define SKBEDIT_F_MARK 0x4 diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h index 38c072f66f2f..c525b3503797 100644 --- a/include/uapi/linux/tc_act/tc_skbmod.h +++ b/include/uapi/linux/tc_act/tc_skbmod.h @@ -13,8 +13,6 @@ #include -#define TCA_ACT_SKBMOD 15 - #define SKBMOD_F_DMAC 0x1 #define SKBMOD_F_SMAC 0x2 #define SKBMOD_F_ETYPE 0x4 diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h index be384d63e1b5..41c8b462c177 100644 --- a/include/uapi/linux/tc_act/tc_tunnel_key.h +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -14,8 +14,6 @@ #include -#define TCA_ACT_TUNNEL_KEY 17 - #define TCA_TUNNEL_KEY_ACT_SET 1 #define TCA_TUNNEL_KEY_ACT_RELEASE 2 diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h index 0d7b5fd6605b..168995b54a70 100644 --- a/include/uapi/linux/tc_act/tc_vlan.h +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -13,8 +13,6 @@ #include -#define TCA_ACT_VLAN 12 - #define TCA_VLAN_ACT_POP 1 #define TCA_VLAN_ACT_PUSH 2 #define TCA_VLAN_ACT_MODIFY 3 -- cgit v1.2.3 From eddd2cf195d6fb5e4bbc91a0fe4be55110f559ab Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 10 Feb 2019 14:25:00 +0200 Subject: net: Change TCA_ACT_* to TCA_ID_* to match that of TCA_ID_POLICE Modify the kernel users of the TCA_ACT_* macros to use TCA_ID_*. For example, use TCA_ID_GACT instead of TCA_ACT_GACT. This will align with TCA_ID_POLICE and also differentiates these identifier, used in struct tc_action_ops type field, from other macros starting with TCA_ACT_. To make things clearer, we name the enum defining the TCA_ID_* identifiers and also change the "type" field of struct tc_action to id. Signed-off-by: Eli Cohen Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 7ab55f97e7c4..51a0496f78ea 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -85,7 +85,7 @@ enum { #define TCA_ACT_SAMPLE 26 /* Action type identifiers*/ -enum { +enum tca_id { TCA_ID_UNSPEC = 0, TCA_ID_POLICE = 1, TCA_ID_GACT = TCA_ACT_GACT, -- cgit v1.2.3 From 46f8bc92758c6259bcf945e9216098661c1587cd Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 9 Feb 2019 23:22:20 -0800 Subject: bpf: Add a bpf_sock pointer to __sk_buff and a bpf_sk_fullsock helper In kernel, it is common to check "skb->sk && sk_fullsock(skb->sk)" before accessing the fields in sock. For example, in __netdev_pick_tx: static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { /* ... */ struct sock *sk = skb->sk; if (queue_index != new_index && sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) sk_tx_queue_set(sk, new_index); /* ... */ return queue_index; } This patch adds a "struct bpf_sock *sk" pointer to the "struct __sk_buff" where a few of the convert_ctx_access() in filter.c has already been accessing the skb->sk sock_common's fields, e.g. sock_ops_convert_ctx_access(). "__sk_buff->sk" is a PTR_TO_SOCK_COMMON_OR_NULL in the verifier. Some of the fileds in "bpf_sock" will not be directly accessible through the "__sk_buff->sk" pointer. It is limited by the new "bpf_sock_common_is_valid_access()". e.g. The existing "type", "protocol", "mark" and "priority" in bpf_sock are not allowed. The newly added "struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)" can be used to get a sk with all accessible fields in "bpf_sock". This helper is added to both cg_skb and sched_(cls|act). int cg_skb_foo(struct __sk_buff *skb) { struct bpf_sock *sk; sk = skb->sk; if (!sk) return 1; sk = bpf_sk_fullsock(sk); if (!sk) return 1; if (sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP) return 1; /* some_traffic_shaping(); */ return 1; } (1) The sk is read only (2) There is no new "struct bpf_sock_common" introduced. (3) Future kernel sock's members could be added to bpf_sock only instead of repeatedly adding at multiple places like currently in bpf_sock_ops_md, bpf_sock_addr_md, sk_reuseport_md...etc. (4) After "sk = skb->sk", the reg holding sk is in type PTR_TO_SOCK_COMMON_OR_NULL. (5) After bpf_sk_fullsock(), the return type will be in type PTR_TO_SOCKET_OR_NULL which is the same as the return type of bpf_sk_lookup_xxx(). However, bpf_sk_fullsock() does not take refcnt. The acquire_reference_state() is only depending on the return type now. To avoid it, a new is_acquire_function() is checked before calling acquire_reference_state(). (6) The WARN_ON in "release_reference_state()" is no longer an internal verifier bug. When reg->id is not found in state->refs[], it means the bpf_prog does something wrong like "bpf_sk_release(bpf_sk_fullsock(skb->sk))" where reference has never been acquired by calling "bpf_sk_fullsock(skb->sk)". A -EINVAL and a verbose are done instead of WARN_ON. A test is added to the test_verifier in a later patch. Since the WARN_ON in "release_reference_state()" is no longer needed, "__release_reference_state()" is folded into "release_reference_state()" also. Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1777fa0c61e4..5d79cba74ddc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2329,6 +2329,14 @@ union bpf_attr { * "**y**". * Return * 0 + * + * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) + * Description + * This helper gets a **struct bpf_sock** pointer such + * that all the fields in bpf_sock can be accessed. + * Return + * A **struct bpf_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2425,7 +2433,8 @@ union bpf_attr { FN(msg_pop_data), \ FN(rc_pointer_rel), \ FN(spin_lock), \ - FN(spin_unlock), + FN(spin_unlock), \ + FN(sk_fullsock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2545,6 +2554,7 @@ struct __sk_buff { __u64 tstamp; __u32 wire_len; __u32 gso_segs; + __bpf_md_ptr(struct bpf_sock *, sk); }; struct bpf_tunnel_key { -- cgit v1.2.3 From aa65d6960a98fc15a96ce361b26e9fd55c9bccc5 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 9 Feb 2019 23:22:21 -0800 Subject: bpf: Add state, dst_ip4, dst_ip6 and dst_port to bpf_sock This patch adds "state", "dst_ip4", "dst_ip6" and "dst_port" to the bpf_sock. The userspace has already been using "state", e.g. inet_diag (ss -t) and getsockopt(TCP_INFO). This patch also allows narrow load on the following existing fields: "family", "type", "protocol" and "src_port". Unlike IP address, the load offset is resticted to the first byte for them but it can be relaxed later if there is a use case. This patch also folds __sock_filter_check_size() into bpf_sock_is_valid_access() since it is not called by any where else. All bpf_sock checking is in one place. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5d79cba74ddc..d8f91777c5b6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2606,15 +2606,14 @@ struct bpf_sock { __u32 protocol; __u32 mark; __u32 priority; - __u32 src_ip4; /* Allows 1,2,4-byte read. - * Stored in network byte order. - */ - __u32 src_ip6[4]; /* Allows 1,2,4-byte read. - * Stored in network byte order. - */ - __u32 src_port; /* Allows 4-byte read. - * Stored in host byte order - */ + /* IP address also allows 1 and 2 bytes access */ + __u32 src_ip4; + __u32 src_ip6[4]; + __u32 src_port; /* host byte order */ + __u32 dst_port; /* network byte order */ + __u32 dst_ip4; + __u32 dst_ip6[4]; + __u32 state; }; struct bpf_sock_tuple { -- cgit v1.2.3 From 655a51e536c09d15ffa3603b1b6fce2b45b85a1f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 9 Feb 2019 23:22:24 -0800 Subject: bpf: Add struct bpf_tcp_sock and BPF_FUNC_tcp_sock This patch adds a helper function BPF_FUNC_tcp_sock and it is currently available for cg_skb and sched_(cls|act): struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk); int cg_skb_foo(struct __sk_buff *skb) { struct bpf_tcp_sock *tp; struct bpf_sock *sk; __u32 snd_cwnd; sk = skb->sk; if (!sk) return 1; tp = bpf_tcp_sock(sk); if (!tp) return 1; snd_cwnd = tp->snd_cwnd; /* ... */ return 1; } A 'struct bpf_tcp_sock' is also added to the uapi bpf.h to provide read-only access. bpf_tcp_sock has all the existing tcp_sock's fields that has already been exposed by the bpf_sock_ops. i.e. no new tcp_sock's fields are exposed in bpf.h. This helper returns a pointer to the tcp_sock. If it is not a tcp_sock or it cannot be traced back to a tcp_sock by sk_to_full_sk(), it returns NULL. Hence, the caller needs to check for NULL before accessing it. The current use case is to expose members from tcp_sock to allow a cg_skb_bpf_prog to provide per cgroup traffic policing/shaping. Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d8f91777c5b6..25c8c0e62ecf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2337,6 +2337,15 @@ union bpf_attr { * Return * A **struct bpf_sock** pointer on success, or NULL in * case of failure. + * + * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) + * Description + * This helper gets a **struct bpf_tcp_sock** pointer from a + * **struct bpf_sock** pointer. + * + * Return + * A **struct bpf_tcp_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2434,7 +2443,8 @@ union bpf_attr { FN(rc_pointer_rel), \ FN(spin_lock), \ FN(spin_unlock), \ - FN(sk_fullsock), + FN(sk_fullsock), \ + FN(tcp_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2616,6 +2626,45 @@ struct bpf_sock { __u32 state; }; +struct bpf_tcp_sock { + __u32 snd_cwnd; /* Sending congestion window */ + __u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + __u32 rtt_min; + __u32 snd_ssthresh; /* Slow start size threshold */ + __u32 rcv_nxt; /* What we want to receive next */ + __u32 snd_nxt; /* Next sequence we send */ + __u32 snd_una; /* First byte we want an ack for */ + __u32 mss_cache; /* Cached effective mss, not including SACKS */ + __u32 ecn_flags; /* ECN status bits. */ + __u32 rate_delivered; /* saved rate sample: packets delivered */ + __u32 rate_interval_us; /* saved rate sample: time elapsed */ + __u32 packets_out; /* Packets which are "in flight" */ + __u32 retrans_out; /* Retransmitted packets out */ + __u32 total_retrans; /* Total retransmits for entire connection */ + __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn + * total number of segments in. + */ + __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn + * total number of data segments in. + */ + __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut + * The total number of segments sent. + */ + __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut + * total number of data segments sent. + */ + __u32 lost_out; /* Lost packets */ + __u32 sacked_out; /* SACK'd packets */ + __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived + * sum(delta(rcv_nxt)), or how many bytes + * were acked. + */ + __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked + * sum(delta(snd_una)), or how many bytes + * were acked. + */ +}; + struct bpf_sock_tuple { union { struct { -- cgit v1.2.3 From 1ec17dbd90f8b638f41ee650558609c1af63dfa0 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sat, 9 Feb 2019 13:35:52 +0300 Subject: inet_diag: fix reporting cgroup classid and fallback to priority Field idiag_ext in struct inet_diag_req_v2 used as bitmap of requested extensions has only 8 bits. Thus extensions starting from DCTCPINFO cannot be requested directly. Some of them included into response unconditionally or hook into some of lower 8 bits. Extension INET_DIAG_CLASS_ID has not way to request from the beginning. This patch bundle it with INET_DIAG_TCLASS (ipv6 tos), fixes space reservation, and documents behavior for other extensions. Also this patch adds fallback to reporting socket priority. This filed is more widely used for traffic classification because ipv4 sockets automatically maps TOS to priority and default qdisc pfifo_fast knows about that. But priority could be changed via setsockopt SO_PRIORITY so INET_DIAG_TOS isn't enough for predicting class. Also cgroup2 obsoletes net_cls classid (it always zero), but we cannot reuse this field for reporting cgroup2 id because it is 64-bit (ino+gen). So, after this patch INET_DIAG_CLASS_ID will report socket priority for most common setup when net_cls isn't set and/or cgroup2 in use. Fixes: 0888e372c37f ("net: inet: diag: expose sockets cgroup classid") Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 14565d703291..e8baca85bac6 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -137,15 +137,21 @@ enum { INET_DIAG_TCLASS, INET_DIAG_SKMEMINFO, INET_DIAG_SHUTDOWN, - INET_DIAG_DCTCPINFO, - INET_DIAG_PROTOCOL, /* response attribute only */ + + /* + * Next extenstions cannot be requested in struct inet_diag_req_v2: + * its field idiag_ext has only 8 bits. + */ + + INET_DIAG_DCTCPINFO, /* request as INET_DIAG_VEGASINFO */ + INET_DIAG_PROTOCOL, /* response attribute only */ INET_DIAG_SKV6ONLY, INET_DIAG_LOCALS, INET_DIAG_PEERS, INET_DIAG_PAD, - INET_DIAG_MARK, - INET_DIAG_BBRINFO, - INET_DIAG_CLASS_ID, + INET_DIAG_MARK, /* only with CAP_NET_ADMIN */ + INET_DIAG_BBRINFO, /* request as INET_DIAG_VEGASINFO */ + INET_DIAG_CLASS_ID, /* request as INET_DIAG_TCLASS */ INET_DIAG_MD5SIG, __INET_DIAG_MAX, }; -- cgit v1.2.3 From d9a9ea94f748f47b1d75c6c5e33edcf74476c445 Mon Sep 17 00:00:00 2001 From: Chad Austin Date: Mon, 7 Jan 2019 16:53:17 -0800 Subject: fuse: support clients that don't implement 'opendir' Allow filesystems to return ENOSYS from opendir, preventing the kernel from sending opendir and releasedir messages in the future. This avoids userspace transitions when filesystems don't need to keep track of state per directory handle. A new capability flag, FUSE_NO_OPENDIR_SUPPORT, parallels FUSE_NO_OPEN_SUPPORT, indicating the new semantics for returning ENOSYS from opendir. Signed-off-by: Chad Austin Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index b4967d48bfda..2ac598614a8f 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -122,6 +122,9 @@ * - add FOPEN_CACHE_DIR * - add FUSE_MAX_PAGES, add max_pages to init_out * - add FUSE_CACHE_SYMLINKS + * + * 7.29 + * - add FUSE_NO_OPENDIR_SUPPORT flag */ #ifndef _LINUX_FUSE_H @@ -157,7 +160,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 28 +#define FUSE_KERNEL_MINOR_VERSION 29 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -259,6 +262,7 @@ struct fuse_file_lock { * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages * FUSE_CACHE_SYMLINKS: cache READLINK responses + * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -284,6 +288,7 @@ struct fuse_file_lock { #define FUSE_ABORT_ERROR (1 << 21) #define FUSE_MAX_PAGES (1 << 22) #define FUSE_CACHE_SYMLINKS (1 << 23) +#define FUSE_NO_OPENDIR_SUPPORT (1 << 24) /** * CUSE INIT request/reply flags -- cgit v1.2.3 From 7fd8afa8933a095a97995885740999f174e61b60 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 11 Feb 2019 15:25:29 +0100 Subject: net: phy: Add generic support for 2.5GBaseT and 5GBaseT The 802.3bz specification, based on previous by the NBASET alliance, defines the 2.5GBaseT and 5GBaseT link modes for ethernet traffic on cat5e, cat6 and cat7 cables. These mode integrate with the already defined C45 MDIO PMA/PMD registers set that added 10G support, by defining some previously reserved bits, and adding a new register (2.5G/5G Extended abilities). This commit adds the required definitions in include/uapi/linux/mdio.h to support these modes, and detect when a link-partner advertises them. It also adds support for these mode in the generic C45 PHY infrastructure. Signed-off-by: Maxime Chevallier Reviewed-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/mdio.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 0e012b168e4d..0a552061ff1c 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -45,6 +45,7 @@ #define MDIO_AN_ADVERTISE 16 /* AN advertising (base page) */ #define MDIO_AN_LPA 19 /* AN LP abilities (base page) */ #define MDIO_PCS_EEE_ABLE 20 /* EEE Capability register */ +#define MDIO_PMA_NG_EXTABLE 21 /* 2.5G/5G PMA/PMD extended ability */ #define MDIO_PCS_EEE_WK_ERR 22 /* EEE wake error counter */ #define MDIO_PHYXS_LNSTAT 24 /* PHY XGXS lane state */ #define MDIO_AN_EEE_ADV 60 /* EEE advertisement */ @@ -92,6 +93,10 @@ #define MDIO_CTRL1_SPEED10G (MDIO_CTRL1_SPEEDSELEXT | 0x00) /* 10PASS-TS/2BASE-TL */ #define MDIO_CTRL1_SPEED10P2B (MDIO_CTRL1_SPEEDSELEXT | 0x04) +/* 2.5 Gb/s */ +#define MDIO_CTRL1_SPEED2_5G (MDIO_CTRL1_SPEEDSELEXT | 0x18) +/* 5 Gb/s */ +#define MDIO_CTRL1_SPEED5G (MDIO_CTRL1_SPEEDSELEXT | 0x1c) /* Status register 1. */ #define MDIO_STAT1_LPOWERABLE 0x0002 /* Low-power ability */ @@ -145,6 +150,8 @@ #define MDIO_PMA_CTRL2_1000BKX 0x000d /* 1000BASE-KX type */ #define MDIO_PMA_CTRL2_100BTX 0x000e /* 100BASE-TX type */ #define MDIO_PMA_CTRL2_10BT 0x000f /* 10BASE-T type */ +#define MDIO_PMA_CTRL2_2_5GBT 0x0030 /* 2.5GBaseT type */ +#define MDIO_PMA_CTRL2_5GBT 0x0031 /* 5GBaseT type */ #define MDIO_PCS_CTRL2_TYPE 0x0003 /* PCS type selection */ #define MDIO_PCS_CTRL2_10GBR 0x0000 /* 10GBASE-R type */ #define MDIO_PCS_CTRL2_10GBX 0x0001 /* 10GBASE-X type */ @@ -198,6 +205,7 @@ #define MDIO_PMA_EXTABLE_1000BKX 0x0040 /* 1000BASE-KX ability */ #define MDIO_PMA_EXTABLE_100BTX 0x0080 /* 100BASE-TX ability */ #define MDIO_PMA_EXTABLE_10BT 0x0100 /* 10BASE-T ability */ +#define MDIO_PMA_EXTABLE_NBT 0x4000 /* 2.5/5GBASE-T ability */ /* PHY XGXS lane state register. */ #define MDIO_PHYXS_LNSTAT_SYNC0 0x0001 @@ -234,9 +242,13 @@ #define MDIO_PCS_10GBRT_STAT2_BER 0x3f00 /* AN 10GBASE-T control register. */ +#define MDIO_AN_10GBT_CTRL_ADV2_5G 0x0080 /* Advertise 2.5GBASE-T */ +#define MDIO_AN_10GBT_CTRL_ADV5G 0x0100 /* Advertise 5GBASE-T */ #define MDIO_AN_10GBT_CTRL_ADV10G 0x1000 /* Advertise 10GBASE-T */ /* AN 10GBASE-T status register. */ +#define MDIO_AN_10GBT_STAT_LP2_5G 0x0020 /* LP is 2.5GBT capable */ +#define MDIO_AN_10GBT_STAT_LP5G 0x0040 /* LP is 5GBT capable */ #define MDIO_AN_10GBT_STAT_LPTRR 0x0200 /* LP training reset req. */ #define MDIO_AN_10GBT_STAT_LPLTABLE 0x0400 /* LP loop timing ability */ #define MDIO_AN_10GBT_STAT_LP10G 0x0800 /* LP is 10GBT capable */ @@ -265,6 +277,10 @@ #define MDIO_EEE_10GKX4 0x0020 /* 10G KX4 EEE cap */ #define MDIO_EEE_10GKR 0x0040 /* 10G KR EEE cap */ +/* 2.5G/5G Extended abilities register. */ +#define MDIO_PMA_NG_EXTABLE_2_5GBT 0x0001 /* 2.5GBASET ability */ +#define MDIO_PMA_NG_EXTABLE_5GBT 0x0002 /* 5GBASET ability */ + /* LASI RX_ALARM control/status registers. */ #define MDIO_PMA_LASI_RX_PHYXSLFLT 0x0001 /* PHY XS RX local fault */ #define MDIO_PMA_LASI_RX_PCSLFLT 0x0008 /* PCS RX local fault */ -- cgit v1.2.3 From 3e0bd37ce0e4a574df6d87a901e13bcb46e10301 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 13 Feb 2019 11:53:35 -0800 Subject: bpf: add plumbing for BPF_LWT_ENCAP_IP in bpf_lwt_push_encap This patch adds all needed plumbing in preparation to allowing bpf programs to do IP encapping via bpf_lwt_push_encap. Actual implementation is added in the next patch in the patchset. Of note: - bpf_lwt_push_encap can now be called from BPF_PROG_TYPE_LWT_XMIT prog types in addition to BPF_PROG_TYPE_LWT_IN; - if the skb being encapped has GSO set, encapsulation is limited to IPIP/IP+GRE/IP+GUE (both IPv4 and IPv6); - as route lookups are different for ingress vs egress, the single external bpf_lwt_push_encap BPF helper is routed internally to either bpf_lwt_in_push_encap or bpf_lwt_xmit_push_encap BPF_CALLs, depending on prog type. v8 changes: fixed a typo. Signed-off-by: Peter Oskolkov Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 25c8c0e62ecf..bcdd2474eee7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2016,6 +2016,19 @@ union bpf_attr { * Only works if *skb* contains an IPv6 packet. Insert a * Segment Routing Header (**struct ipv6_sr_hdr**) inside * the IPv6 header. + * **BPF_LWT_ENCAP_IP** + * IP encapsulation (GRE/GUE/IPIP/etc). The outer header + * must be IPv4 or IPv6, followed by zero or more + * additional headers, up to LWT_BPF_MAX_HEADROOM total + * bytes in all prepended headers. Please note that + * if skb_is_gso(skb) is true, no more than two headers + * can be prepended, and the inner header, if present, + * should be either GRE or UDP/GUE. + * + * BPF_LWT_ENCAP_SEG6*** types can be called by bpf programs of + * type BPF_PROG_TYPE_LWT_IN; BPF_LWT_ENCAP_IP type can be called + * by bpf programs of types BPF_PROG_TYPE_LWT_IN and + * BPF_PROG_TYPE_LWT_XMIT. * * A call to this helper is susceptible to change the underlaying * packet buffer. Therefore, at load time, all checks on pointers @@ -2517,7 +2530,8 @@ enum bpf_hdr_start_off { /* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_SEG6, - BPF_LWT_ENCAP_SEG6_INLINE + BPF_LWT_ENCAP_SEG6_INLINE, + BPF_LWT_ENCAP_IP, }; #define __bpf_md_ptr(type, name) \ @@ -2606,7 +2620,15 @@ enum bpf_ret_code { BPF_DROP = 2, /* 3-6 reserved */ BPF_REDIRECT = 7, - /* >127 are reserved for prog type specific return codes */ + /* >127 are reserved for prog type specific return codes. + * + * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and + * BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been + * changed and should be routed based on its new L3 header. + * (This is an L3 redirect, as opposed to L2 redirect + * represented by BPF_REDIRECT above). + */ + BPF_LWT_REROUTE = 128, }; struct bpf_sock { -- cgit v1.2.3 From ca5e9aba753ed15d173c7a7b88e4d402b7ca8121 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Tue, 12 Feb 2019 19:26:03 -0800 Subject: time: Add time_types.h sys/time.h is the mandated include for many time related defines. However, linux/time.h overlaps sys/time.h significantly and this makes including both from userspace or one from the other impossible. This also means that userspace can get away with including sys/time.h whenever it needs linux/time.h and this is what's been happening in the user world usually. But, we have new data types that we plan to use in the uapi time interfaces also defined in the linux/time.h. But, we are unable to use these types when sys/time.h is included. Hence, move the new types to a new header, time_types.h. We intend to eventually have all the uapi defines that the kernel uses defined in this header. Note that the plan is to replace uapi interfaces with timeval to use __kernel_old_timeval, timespec to use __kernel_old_timespec etc. Reported-by: Ran Rozenstein Fixes: 9718475e6908 ("socket: Add SO_TIMESTAMPING_NEW") Signed-off-by: Deepa Dinamani Signed-off-by: David S. Miller --- include/uapi/linux/time.h | 36 +----------------------------------- include/uapi/linux/time_types.h | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 35 deletions(-) create mode 100644 include/uapi/linux/time_types.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index b8ad1b86b942..958932effc5e 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -3,7 +3,7 @@ #define _UAPI_LINUX_TIME_H #include - +#include #ifndef _STRUCT_TIMESPEC #define _STRUCT_TIMESPEC @@ -23,7 +23,6 @@ struct timezone { int tz_dsttime; /* type of dst correction */ }; - /* * Names of the interval timers, and structure * defining a timer setting: @@ -42,39 +41,6 @@ struct itimerval { struct timeval it_value; /* current value */ }; -#ifndef __kernel_timespec -struct __kernel_timespec { - __kernel_time64_t tv_sec; /* seconds */ - long long tv_nsec; /* nanoseconds */ -}; -#endif - -#ifndef __kernel_itimerspec -struct __kernel_itimerspec { - struct __kernel_timespec it_interval; /* timer period */ - struct __kernel_timespec it_value; /* timer expiration */ -}; -#endif - -/* - * legacy timeval structure, only embedded in structures that - * traditionally used 'timeval' to pass time intervals (not absolute - * times). Do not add new users. If user space fails to compile - * here, this is probably because it is not y2038 safe and needs to - * be changed to use another interface. - */ -#ifndef __kernel_old_timeval -struct __kernel_old_timeval { - __kernel_long_t tv_sec; - __kernel_long_t tv_usec; -}; -#endif - -struct __kernel_sock_timeval { - __s64 tv_sec; - __s64 tv_usec; -}; - /* * The IDs of the various system clocks (for POSIX.1b interval timers): */ diff --git a/include/uapi/linux/time_types.h b/include/uapi/linux/time_types.h new file mode 100644 index 000000000000..459070c61d47 --- /dev/null +++ b/include/uapi/linux/time_types.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_TIME_TYPES_H +#define _UAPI_LINUX_TIME_TYPES_H + +#include + +#ifndef __kernel_timespec +struct __kernel_timespec { + __kernel_time64_t tv_sec; /* seconds */ + long long tv_nsec; /* nanoseconds */ +}; +#endif + +#ifndef __kernel_itimerspec +struct __kernel_itimerspec { + struct __kernel_timespec it_interval; /* timer period */ + struct __kernel_timespec it_value; /* timer expiration */ +}; +#endif + +/* + * legacy timeval structure, only embedded in structures that + * traditionally used 'timeval' to pass time intervals (not absolute + * times). Do not add new users. If user space fails to compile + * here, this is probably because it is not y2038 safe and needs to + * be changed to use another interface. + */ +#ifndef __kernel_old_timeval +struct __kernel_old_timeval { + __kernel_long_t tv_sec; + __kernel_long_t tv_usec; +}; +#endif + +struct __kernel_sock_timeval { + __s64 tv_sec; + __s64 tv_usec; +}; + +#endif /* _UAPI_LINUX_TIME_TYPES_H */ -- cgit v1.2.3 From 460a2db0273efe15488a3e4b88da2678eb403178 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Tue, 12 Feb 2019 19:26:04 -0800 Subject: errqueue.h: Include time_types.h Now that we have a separate header for struct __kernel_timespec, include it directly without relying on userspace to do it. Reported-by: Ran Rozenstein Signed-off-by: Deepa Dinamani Signed-off-by: David S. Miller --- include/uapi/linux/errqueue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index d955b9e32288..28491dac074b 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -3,6 +3,7 @@ #define _UAPI_LINUX_ERRQUEUE_H #include +#include struct sock_extended_err { __u32 ee_errno; -- cgit v1.2.3 From 76726ccb7f461c83040e7082cf95fe1dea2afd1f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Feb 2019 13:40:44 -0800 Subject: devlink: add flash update command Add devlink flash update command. Advanced NICs have firmware stored in flash and often cryptographically secured. Updating that flash is handled by management firmware. Ethtool has a flash update command which served us well, however, it has two shortcomings: - it takes rtnl_lock unnecessarily - really flash update has nothing to do with networking, so using a networking device as a handle is suboptimal, which leads us to the second one: - it requires a functioning netdev - in case device enters an error state and can't spawn a netdev (e.g. communication with the device fails) there is no netdev to use as a handle for flashing. Devlink already has the ability to report the firmware versions, now with the ability to update the firmware/flash we will be able to recover devices in bad state. To enable updates of sub-components of the FW allow passing component name. This name should correspond to one of the versions reported in devlink info. v1: - replace target id with component name (Jiri). Signed-off-by: Jakub Kicinski Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 72d9f7c89190..53de8802a000 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -103,6 +103,8 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, + DEVLINK_CMD_FLASH_UPDATE, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -326,6 +328,10 @@ enum devlink_attr { DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, /* u64 */ DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, /* u64 */ DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, /* u8 */ + + DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME, /* string */ + DEVLINK_ATTR_FLASH_UPDATE_COMPONENT, /* string */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From 746c9398f5ac2b3f5730da4ed09e99ef4bb50b4a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 8 Feb 2019 01:02:55 -0500 Subject: arch: move common mmap flags to linux/mman.h Now that we have 3 mmap flags shared by all architectures, let's move them into the common header. This will help discourage future architectures from duplicating code. Signed-off-by: Michael S. Tsirkin Signed-off-by: Arnd Bergmann --- include/uapi/linux/mman.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index d0f515d53299..fc1a64c3447b 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -12,6 +12,10 @@ #define OVERCOMMIT_ALWAYS 1 #define OVERCOMMIT_NEVER 2 +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ + /* * Huge page size encoding when MAP_HUGETLB is specified, and a huge page * size other than the default is desired. See hugetlb_encode.h. -- cgit v1.2.3 From a7fe4ca72b1fe7877de5672640d0b4e023d0fdca Mon Sep 17 00:00:00 2001 From: Vivek Kasireddy Date: Thu, 7 Feb 2019 22:18:43 -0500 Subject: media: v4l: Add 32-bit packed YUV formats The formats added in this patch include: V4L2_PIX_FMT_AYUV32 V4L2_PIX_FMT_XYUV32 V4L2_PIX_FMT_VUYA32 V4L2_PIX_FMT_VUYX32 These formats enable the trasmission of alpha channel data to other drivers and userspace applications in addition to YUV data. For example, buffers generated by drivers in one of these formats can be used by the Weston compositor to display as a texture or flipped directly onto the overlay planes with the help of a DRM driver. Signed-off-by: Vivek Kasireddy Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 9a920f071ff9..1db220da3bcc 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -562,6 +562,10 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_YUV555 v4l2_fourcc('Y', 'U', 'V', 'O') /* 16 YUV-5-5-5 */ #define V4L2_PIX_FMT_YUV565 v4l2_fourcc('Y', 'U', 'V', 'P') /* 16 YUV-5-6-5 */ #define V4L2_PIX_FMT_YUV32 v4l2_fourcc('Y', 'U', 'V', '4') /* 32 YUV-8-8-8-8 */ +#define V4L2_PIX_FMT_AYUV32 v4l2_fourcc('A', 'Y', 'U', 'V') /* 32 AYUV-8-8-8-8 */ +#define V4L2_PIX_FMT_XYUV32 v4l2_fourcc('X', 'Y', 'U', 'V') /* 32 XYUV-8-8-8-8 */ +#define V4L2_PIX_FMT_VUYA32 v4l2_fourcc('V', 'U', 'Y', 'A') /* 32 VUYA-8-8-8-8 */ +#define V4L2_PIX_FMT_VUYX32 v4l2_fourcc('V', 'U', 'Y', 'X') /* 32 VUYX-8-8-8-8 */ #define V4L2_PIX_FMT_HI240 v4l2_fourcc('H', 'I', '2', '4') /* 8 8-bit color */ #define V4L2_PIX_FMT_HM12 v4l2_fourcc('H', 'M', '1', '2') /* 8 YUV 4:2:0 16x16 macroblocks */ #define V4L2_PIX_FMT_M420 v4l2_fourcc('M', '4', '2', '0') /* 12 YUV 4:2:0 2 lines y, 1 line uv interleaved */ -- cgit v1.2.3 From 721074b03411327e7bf41555d4cc7c18f49313f7 Mon Sep 17 00:00:00 2001 From: Patrick Lerda Date: Thu, 17 Jan 2019 03:50:13 -0500 Subject: media: rc: rcmm decoder and encoder media: add support for RCMM infrared remote controls. Signed-off-by: Patrick Lerda Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index 6b319581882f..45fcbf99d72e 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -192,6 +192,9 @@ struct lirc_scancode { * @RC_PROTO_XMP: XMP protocol * @RC_PROTO_CEC: CEC protocol * @RC_PROTO_IMON: iMon Pad protocol + * @RC_PROTO_RCMM12: RC-MM protocol 12 bits + * @RC_PROTO_RCMM24: RC-MM protocol 24 bits + * @RC_PROTO_RCMM32: RC-MM protocol 32 bits */ enum rc_proto { RC_PROTO_UNKNOWN = 0, @@ -218,6 +221,9 @@ enum rc_proto { RC_PROTO_XMP = 21, RC_PROTO_CEC = 22, RC_PROTO_IMON = 23, + RC_PROTO_RCMM12 = 24, + RC_PROTO_RCMM24 = 25, + RC_PROTO_RCMM32 = 26, }; #endif -- cgit v1.2.3 From 2736d94f351b92749c07efef01f7c10548e39ad8 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 22 Jan 2019 10:50:10 +0200 Subject: ethtool: Added support for 50Gbps per lane link modes Added support for 50Gbps per lane link modes. Define various 50G, 100G and 200G link modes using it. Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/uapi/linux/ethtool.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 17be76aeb468..378c52308d89 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1453,6 +1453,21 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_FEC_NONE_BIT = 49, ETHTOOL_LINK_MODE_FEC_RS_BIT = 50, ETHTOOL_LINK_MODE_FEC_BASER_BIT = 51, + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT = 52, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT = 53, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT = 54, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT = 55, + ETHTOOL_LINK_MODE_50000baseDR_Full_BIT = 56, + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT = 57, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT = 58, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT = 59, + ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT = 60, + ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT = 61, + ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT = 62, + ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT = 63, + ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT = 64, + ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT = 65, + ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT = 66, /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* @@ -1461,7 +1476,7 @@ enum ethtool_link_mode_bit_indices { */ __ETHTOOL_LINK_MODE_LAST - = ETHTOOL_LINK_MODE_FEC_BASER_BIT, + = ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT, }; #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ @@ -1569,6 +1584,7 @@ enum ethtool_link_mode_bit_indices { #define SPEED_50000 50000 #define SPEED_56000 56000 #define SPEED_100000 100000 +#define SPEED_200000 200000 #define SPEED_UNKNOWN -1 -- cgit v1.2.3 From fadccd8fc2d06cf7fd222245d7e04b00fae946cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Feb 2019 09:37:13 +0100 Subject: nvme_ioctl.h: remove duplicate GPL boilerplate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have a ЅPDX header, so no need to duplicate the information. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- include/uapi/linux/nvme_ioctl.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index 6e74b1eaf541..1c215ea1798e 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -2,15 +2,6 @@ /* * Definitions for the NVM Express ioctl interface * Copyright (c) 2011-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #ifndef _UAPI_LINUX_NVME_IOCTL_H -- cgit v1.2.3 From 61697a6abd24acba941359c6268a94f4afe4a53d Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 18 Jan 2019 14:19:26 -0500 Subject: dm: eliminate 'split_discard_bios' flag from DM target interface There is no need to have DM core split discards on behalf of a DM target now that blk_queue_split() handles splitting discards based on the queue_limits. A DM target just needs to set max_discard_sectors, discard_granularity, etc, in queue_limits. Signed-off-by: Mike Snitzer --- include/uapi/linux/dm-ioctl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index d1e49514977b..f396a82dfd3e 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -270,9 +270,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 39 +#define DM_VERSION_MINOR 40 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2018-04-03)" +#define DM_VERSION_EXTRA "-ioctl (2019-01-18)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 54719527fd06e80fce52b98537414035cd21e8d4 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 21 Feb 2019 14:12:01 +0200 Subject: devlink: Rename devlink health attributes Rename devlink health attributes for better reflect the attributes use. Add COUNT prefix on error counter attribute and recovery counter attribute. Fixes: 7afe335a8bed ("devlink: Add health get command") Signed-off-by: Aya Levin Signed-off-by: Eran Ben Elisha Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 53de8802a000..5bb4ea67d84f 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -323,8 +323,8 @@ enum devlink_attr { DEVLINK_ATTR_HEALTH_REPORTER, /* nested */ DEVLINK_ATTR_HEALTH_REPORTER_NAME, /* string */ DEVLINK_ATTR_HEALTH_REPORTER_STATE, /* u8 */ - DEVLINK_ATTR_HEALTH_REPORTER_ERR, /* u64 */ - DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT, /* u64 */ DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, /* u64 */ DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, /* u64 */ DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, /* u8 */ -- cgit v1.2.3 From ca8d4794f669e721fb5198f6d142e42dd8080239 Mon Sep 17 00:00:00 2001 From: Callum Sinclair Date: Mon, 18 Feb 2019 10:07:52 +1300 Subject: ipmr: ip6mr: Create new sockopt to clear mfc cache or vifs Currently the only way to clear the forwarding cache was to delete the entries one by one using the MRT_DEL_MFC socket option or to destroy and recreate the socket. Create a new socket option which with the use of optional flags can clear any combination of multicast entries (static or not static) and multicast vifs (static or not static). Calling the new socket option MRT_FLUSH with the flags MRT_FLUSH_MFC and MRT_FLUSH_VIFS will clear all entries and vifs on the socket except for static entries. Signed-off-by: Callum Sinclair Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/mroute.h | 9 ++++++++- include/uapi/linux/mroute6.h | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index 5d37a9ccce63..11c8c1fc1124 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -28,12 +28,19 @@ #define MRT_TABLE (MRT_BASE+9) /* Specify mroute table ID */ #define MRT_ADD_MFC_PROXY (MRT_BASE+10) /* Add a (*,*|G) mfc entry */ #define MRT_DEL_MFC_PROXY (MRT_BASE+11) /* Del a (*,*|G) mfc entry */ -#define MRT_MAX (MRT_BASE+11) +#define MRT_FLUSH (MRT_BASE+12) /* Flush all mfc entries and/or vifs */ +#define MRT_MAX (MRT_BASE+12) #define SIOCGETVIFCNT SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) #define SIOCGETRPF (SIOCPROTOPRIVATE+2) +/* MRT_FLUSH optional flags */ +#define MRT_FLUSH_MFC 1 /* Flush multicast entries */ +#define MRT_FLUSH_MFC_STATIC 2 /* Flush static multicast entries */ +#define MRT_FLUSH_VIFS 4 /* Flush multicast vifs */ +#define MRT_FLUSH_VIFS_STATIC 8 /* Flush static multicast vifs */ + #define MAXVIFS 32 typedef unsigned long vifbitmap_t; /* User mode code depends on this lot */ typedef unsigned short vifi_t; diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index 9999cc006390..c36177a86516 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -31,12 +31,19 @@ #define MRT6_TABLE (MRT6_BASE+9) /* Specify mroute table ID */ #define MRT6_ADD_MFC_PROXY (MRT6_BASE+10) /* Add a (*,*|G) mfc entry */ #define MRT6_DEL_MFC_PROXY (MRT6_BASE+11) /* Del a (*,*|G) mfc entry */ -#define MRT6_MAX (MRT6_BASE+11) +#define MRT6_FLUSH (MRT6_BASE+12) /* Flush all mfc entries and/or vifs */ +#define MRT6_MAX (MRT6_BASE+12) #define SIOCGETMIFCNT_IN6 SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT_IN6 (SIOCPROTOPRIVATE+1) #define SIOCGETRPF (SIOCPROTOPRIVATE+2) +/* MRT6_FLUSH optional flags */ +#define MRT6_FLUSH_MFC 1 /* Flush multicast entries */ +#define MRT6_FLUSH_MFC_STATIC 2 /* Flush static multicast entries */ +#define MRT6_FLUSH_MIFS 4 /* Flushing multicast vifs */ +#define MRT6_FLUSH_MIFS_STATIC 8 /* Flush static multicast vifs */ + #define MAXMIFS 32 typedef unsigned long mifbitmap_t; /* User mode code depends on this lot */ typedef unsigned short mifi_t; -- cgit v1.2.3 From e728fdf0628971d43cb4e48860defc6e8a553761 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 22 Feb 2019 19:25:59 +0100 Subject: net: phy: improve definition of __ETHTOOL_LINK_MODE_MASK_NBITS The way to define __ETHTOOL_LINK_MODE_MASK_NBITS seems to be overly complicated, go with a standard approach instead. Whilst we're at it, move the comment to the right place. v2: - rebased Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 378c52308d89..3652b239dad1 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1432,6 +1432,13 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT = 29, ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT = 30, ETHTOOL_LINK_MODE_25000baseCR_Full_BIT = 31, + + /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit + * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* + * macro for bits > 31. The only way to use indices > 31 is to + * use the new ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. + */ + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT = 32, ETHTOOL_LINK_MODE_25000baseSR_Full_BIT = 33, ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT = 34, @@ -1469,14 +1476,8 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT = 65, ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT = 66, - /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit - * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* - * macro for bits > 31. The only way to use indices > 31 is to - * use the new ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. - */ - - __ETHTOOL_LINK_MODE_LAST - = ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT, + /* must be last entry */ + __ETHTOOL_LINK_MODE_MASK_NBITS }; #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ -- cgit v1.2.3 From 228a73abde5c04428678e917b271f8526cfd90ed Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 4 Jan 2019 13:31:54 +0800 Subject: btrfs: introduce new ioctl to unregister a btrfs device Support for a new command that can be used eg. as a command $ btrfs device scan --forget [dev]' (the final name may change though) to undo the effects of 'btrfs device scan [dev]'. For this purpose this patch proposes to use ioctl #5 as it was empty and is next to the SCAN ioctl. The new ioctl BTRFS_IOC_FORGET_DEV works only on the control device (/dev/btrfs-control) to unregister one or all devices, devices that are not mounted. The argument is struct btrfs_ioctl_vol_args, ::name specifies the device path. To unregister all device, the path is an empty string. Again, the devices are removed only if they aren't part of a mounte filesystem. This new ioctl provides: - release of unwanted btrfs_fs_devices and btrfs_devices structures from memory if the device is not going to be mounted - ability to mount filesystem in degraded mode, when one devices is corrupted like in split brain raid1 - running test cases which would require reloading the kernel module but this is not possible eg. due to mounted filesystem or built-in Signed-off-by: Anand Jain Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index e0763bc4158e..c195896d478f 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -837,6 +837,8 @@ enum btrfs_err_code { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_FORGET_DEV _IOW(BTRFS_IOCTL_MAGIC, 5, \ + struct btrfs_ioctl_vol_args) /* trans start and trans end are dangerous, and only for * use by applications that know how to avoid the * resulting deadlocks -- cgit v1.2.3 From 3f7ae5f3dc5295ac17d6521130ed8a8f8a723fbf Mon Sep 17 00:00:00 2001 From: "Mohit P. Tahiliani" Date: Tue, 26 Feb 2019 00:39:59 +0530 Subject: net: sched: pie: add more cases to auto-tune alpha and beta The current implementation scales the local alpha and beta variables in the calculate_probability function by the same amount for all values of drop probability below 1%. RFC 8033 suggests using additional cases for auto-tuning alpha and beta when the drop probability is less than 1%. In order to add more auto-tuning cases, MAX_PROB must be scaled by u64 instead of u32 to prevent underflow when scaling the local alpha and beta variables in the calculate_probability function. Signed-off-by: Mohit P. Tahiliani Signed-off-by: Dhaval Khandla Signed-off-by: Hrishikesh Hiraskar Signed-off-by: Manish Kumar B Signed-off-by: Sachin D. Patil Signed-off-by: Leslie Monis Acked-by: Dave Taht Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 0d18b1d1fbbc..1eb572ef3f27 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -954,7 +954,7 @@ enum { #define TCA_PIE_MAX (__TCA_PIE_MAX - 1) struct tc_pie_xstats { - __u32 prob; /* current probability */ + __u64 prob; /* current probability */ __u32 delay; /* current delay in ms */ __u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */ __u32 packets_in; /* total number of packets enqueued */ -- cgit v1.2.3 From e5567f5f67621877726f99be040af9fbedda37dc Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Tue, 19 Feb 2019 11:04:51 -0800 Subject: PCI/ATS: Add pci_prg_resp_pasid_required() interface. Return the PRG Response PASID Required bit in the Page Request Status Register. As per PCIe spec r4.0, sec 10.5.2.3, if this bit is Set, the device expects a PASID TLP Prefix on PRG Response Messages when the corresponding Page Requests had a PASID TLP Prefix. If Clear, the device does not expect PASID TLP Prefixes on any PRG Response Message, and the device behavior is undefined if the device receives a PRG Response Message with a PASID TLP Prefix. Also the device behavior is undefined if this bit is Set and the device receives a PRG Response Message with no PASID TLP Prefix when the corresponding Page Requests had a PASID TLP Prefix. This function will be used by drivers like IOMMU, if it is required to check the status of the PRG Response PASID Required bit before enabling the PASID support of the device. Cc: Ashok Raj Cc: Jacob Pan Cc: Keith Busch Suggested-by: Ashok Raj Signed-off-by: Kuppuswamy Sathyanarayanan Acked-by: Bjorn Helgaas Signed-off-by: Joerg Roedel --- include/uapi/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index e1e9888c85e6..898be572b010 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -880,6 +880,7 @@ #define PCI_PRI_STATUS_RF 0x001 /* Response Failure */ #define PCI_PRI_STATUS_UPRGI 0x002 /* Unexpected PRG index */ #define PCI_PRI_STATUS_STOPPED 0x100 /* PRI Stopped */ +#define PCI_PRI_STATUS_PASID 0x8000 /* PRG Response PASID Required */ #define PCI_PRI_MAX_REQ 0x08 /* PRI max reqs supported */ #define PCI_PRI_ALLOC_REQ 0x0c /* PRI max reqs allowed */ #define PCI_EXT_CAP_PRI_SIZEOF 16 -- cgit v1.2.3 From 8c938ddc6df3bbe72809db1be6c9f3af83f5d7a9 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Tue, 19 Feb 2019 11:06:09 -0800 Subject: PCI/ATS: Add pci_ats_page_aligned() interface Return the Page Aligned Request bit in the ATS Capability Register. As per PCIe spec r4.0, sec 10.5.1.2, if the Page Aligned Request bit is set, it indicates the Untranslated Addresses generated by the device are always aligned to a 4096 byte boundary. An IOMMU that can only translate page-aligned addresses can only be used with devices that always produce aligned Untranslated Addresses. This interface will be used by drivers for such IOMMUs to determine whether devices can use the ATS service. Cc: Ashok Raj Cc: Jacob Pan Cc: Keith Busch Suggested-by: Ashok Raj Signed-off-by: Kuppuswamy Sathyanarayanan Acked-by: Bjorn Helgaas Signed-off-by: Joerg Roedel --- include/uapi/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 898be572b010..5c98133f2c94 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -866,6 +866,7 @@ #define PCI_ATS_CAP 0x04 /* ATS Capability Register */ #define PCI_ATS_CAP_QDEP(x) ((x) & 0x1f) /* Invalidate Queue Depth */ #define PCI_ATS_MAX_QDEP 32 /* Max Invalidate Queue Depth */ +#define PCI_ATS_CAP_PAGE_ALIGNED 0x0020 /* Page Aligned Request */ #define PCI_ATS_CTRL 0x06 /* ATS Control Register */ #define PCI_ATS_CTRL_ENABLE 0x8000 /* ATS Enable */ #define PCI_ATS_CTRL_STU(x) ((x) & 0x1f) /* Smallest Translation Unit */ -- cgit v1.2.3 From 5f8f8b93aeb8371c54af08bece2bd04bc2d48707 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 25 Feb 2019 14:28:40 -0800 Subject: bpf: expose program stats via bpf_prog_info Return bpf program run_time_ns and run_cnt via bpf_prog_info Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bcdd2474eee7..2e308e90ffea 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2813,6 +2813,8 @@ struct bpf_prog_info { __u32 jited_line_info_rec_size; __u32 nr_prog_tags; __aligned_u64 prog_tags; + __u64 run_time_ns; + __u64 run_cnt; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3 From 2b188cc1bb857a9d4701ae59aa7768b5124e262e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 7 Jan 2019 10:46:33 -0700 Subject: Add io_uring IO interface The submission queue (SQ) and completion queue (CQ) rings are shared between the application and the kernel. This eliminates the need to copy data back and forth to submit and complete IO. IO submissions use the io_uring_sqe data structure, and completions are generated in the form of io_uring_cqe data structures. The SQ ring is an index into the io_uring_sqe array, which makes it possible to submit a batch of IOs without them being contiguous in the ring. The CQ ring is always contiguous, as completion events are inherently unordered, and hence any io_uring_cqe entry can point back to an arbitrary submission. Two new system calls are added for this: io_uring_setup(entries, params) Sets up an io_uring instance for doing async IO. On success, returns a file descriptor that the application can mmap to gain access to the SQ ring, CQ ring, and io_uring_sqes. io_uring_enter(fd, to_submit, min_complete, flags, sigset, sigsetsize) Initiates IO against the rings mapped to this fd, or waits for them to complete, or both. The behavior is controlled by the parameters passed in. If 'to_submit' is non-zero, then we'll try and submit new IO. If IORING_ENTER_GETEVENTS is set, the kernel will wait for 'min_complete' events, if they aren't already available. It's valid to set IORING_ENTER_GETEVENTS and 'min_complete' == 0 at the same time, this allows the kernel to return already completed events without waiting for them. This is useful only for polling, as for IRQ driven IO, the application can just check the CQ ring without entering the kernel. With this setup, it's possible to do async IO with a single system call. Future developments will enable polled IO with this interface, and polled submission as well. The latter will enable an application to do IO without doing ANY system calls at all. For IRQ driven IO, an application only needs to enter the kernel for completions if it wants to wait for them to occur. Each io_uring is backed by a workqueue, to support buffered async IO as well. We will only punt to an async context if the command would need to wait for IO on the device side. Any data that can be accessed directly in the page cache is done inline. This avoids the slowness issue of usual threadpools, since cached data is accessed as quickly as a sync interface. Sample application: http://git.kernel.dk/cgit/fio/plain/t/io_uring.c Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 95 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 include/uapi/linux/io_uring.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h new file mode 100644 index 000000000000..ac692823d6f4 --- /dev/null +++ b/include/uapi/linux/io_uring.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Header file for the io_uring interface. + * + * Copyright (C) 2019 Jens Axboe + * Copyright (C) 2019 Christoph Hellwig + */ +#ifndef LINUX_IO_URING_H +#define LINUX_IO_URING_H + +#include +#include + +/* + * IO submission data structure (Submission Queue Entry) + */ +struct io_uring_sqe { + __u8 opcode; /* type of operation for this sqe */ + __u8 flags; /* as of now unused */ + __u16 ioprio; /* ioprio for the request */ + __s32 fd; /* file descriptor to do IO on */ + __u64 off; /* offset into file */ + __u64 addr; /* pointer to buffer or iovecs */ + __u32 len; /* buffer size or number of iovecs */ + union { + __kernel_rwf_t rw_flags; + __u32 __resv; + }; + __u64 user_data; /* data to be passed back at completion time */ + __u64 __pad2[3]; +}; + +#define IORING_OP_NOP 0 +#define IORING_OP_READV 1 +#define IORING_OP_WRITEV 2 + +/* + * IO completion data structure (Completion Queue Entry) + */ +struct io_uring_cqe { + __u64 user_data; /* sqe->data submission passed back */ + __s32 res; /* result code for this event */ + __u32 flags; +}; + +/* + * Magic offsets for the application to mmap the data it needs + */ +#define IORING_OFF_SQ_RING 0ULL +#define IORING_OFF_CQ_RING 0x8000000ULL +#define IORING_OFF_SQES 0x10000000ULL + +/* + * Filled with the offset for mmap(2) + */ +struct io_sqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 flags; + __u32 dropped; + __u32 array; + __u32 resv1; + __u64 resv2; +}; + +struct io_cqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 overflow; + __u32 cqes; + __u64 resv[2]; +}; + +/* + * io_uring_enter(2) flags + */ +#define IORING_ENTER_GETEVENTS (1U << 0) + +/* + * Passed in for io_uring_setup(2). Copied back with updated info on success + */ +struct io_uring_params { + __u32 sq_entries; + __u32 cq_entries; + __u32 flags; + __u32 resv[7]; + struct io_sqring_offsets sq_off; + struct io_cqring_offsets cq_off; +}; + +#endif -- cgit v1.2.3 From c992fe2925d776be066d9f6cc13f9ea11d78b657 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Jan 2019 09:43:02 -0700 Subject: io_uring: add fsync support Add a new fsync opcode, which either syncs a range if one is passed, or the whole file if the offset and length fields are both cleared to zero. A flag is provided to use fdatasync semantics, that is only force out metadata which is required to retrieve the file data, but not others like metadata. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ac692823d6f4..4589d56d0b68 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -24,7 +24,7 @@ struct io_uring_sqe { __u32 len; /* buffer size or number of iovecs */ union { __kernel_rwf_t rw_flags; - __u32 __resv; + __u32 fsync_flags; }; __u64 user_data; /* data to be passed back at completion time */ __u64 __pad2[3]; @@ -33,6 +33,12 @@ struct io_uring_sqe { #define IORING_OP_NOP 0 #define IORING_OP_READV 1 #define IORING_OP_WRITEV 2 +#define IORING_OP_FSYNC 3 + +/* + * sqe->fsync_flags + */ +#define IORING_FSYNC_DATASYNC (1U << 0) /* * IO completion data structure (Completion Queue Entry) -- cgit v1.2.3 From def596e9557c91d9846fc4d84d26f2c564644416 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 9 Jan 2019 08:59:42 -0700 Subject: io_uring: support for IO polling Add support for a polled io_uring instance. When a read or write is submitted to a polled io_uring, the application must poll for completions on the CQ ring through io_uring_enter(2). Polled IO may not generate IRQ completions, hence they need to be actively found by the application itself. To use polling, io_uring_setup() must be used with the IORING_SETUP_IOPOLL flag being set. It is illegal to mix and match polled and non-polled IO on an io_uring. Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 4589d56d0b68..5c457ea396e6 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -30,6 +30,11 @@ struct io_uring_sqe { __u64 __pad2[3]; }; +/* + * io_uring_setup() flags + */ +#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ + #define IORING_OP_NOP 0 #define IORING_OP_READV 1 #define IORING_OP_WRITEV 2 -- cgit v1.2.3 From edafccee56ff31678a091ddb7219aba9b28bc3cb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 9 Jan 2019 09:16:05 -0700 Subject: io_uring: add support for pre-mapped user IO buffers If we have fixed user buffers, we can map them into the kernel when we setup the io_uring. That avoids the need to do get_user_pages() for each and every IO. To utilize this feature, the application must call io_uring_register() after having setup an io_uring instance, passing in IORING_REGISTER_BUFFERS as the opcode. The argument must be a pointer to an iovec array, and the nr_args should contain how many iovecs the application wishes to map. If successful, these buffers are now mapped into the kernel, eligible for IO. To use these fixed buffers, the application must use the IORING_OP_READ_FIXED and IORING_OP_WRITE_FIXED opcodes, and then set sqe->index to the desired buffer index. sqe->addr..sqe->addr+seq->len must point to somewhere inside the indexed buffer. The application may register buffers throughout the lifetime of the io_uring instance. It can call io_uring_register() with IORING_UNREGISTER_BUFFERS as the opcode to unregister the current set of buffers, and then register a new set. The application need not unregister buffers explicitly before shutting down the io_uring instance. It's perfectly valid to setup a larger buffer, and then sometimes only use parts of it for an IO. As long as the range is within the originally mapped region, it will work just fine. For now, buffers must not be file backed. If file backed buffers are passed in, the registration will fail with -1/EOPNOTSUPP. This restriction may be relaxed in the future. RLIMIT_MEMLOCK is used to check how much memory we can pin. A somewhat arbitrary 1G per buffer size is also imposed. Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5c457ea396e6..cf28f7a11f12 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -27,7 +27,10 @@ struct io_uring_sqe { __u32 fsync_flags; }; __u64 user_data; /* data to be passed back at completion time */ - __u64 __pad2[3]; + union { + __u16 buf_index; /* index into fixed buffers, if used */ + __u64 __pad2[3]; + }; }; /* @@ -39,6 +42,8 @@ struct io_uring_sqe { #define IORING_OP_READV 1 #define IORING_OP_WRITEV 2 #define IORING_OP_FSYNC 3 +#define IORING_OP_READ_FIXED 4 +#define IORING_OP_WRITE_FIXED 5 /* * sqe->fsync_flags @@ -103,4 +108,10 @@ struct io_uring_params { struct io_cqring_offsets cq_off; }; +/* + * io_uring_register(2) opcodes and arguments + */ +#define IORING_REGISTER_BUFFERS 0 +#define IORING_UNREGISTER_BUFFERS 1 + #endif -- cgit v1.2.3 From 6b06314c47e141031be043539900d80d2c7ba10f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Jan 2019 22:13:58 -0700 Subject: io_uring: add file set registration We normally have to fget/fput for each IO we do on a file. Even with the batching we do, the cost of the atomic inc/dec of the file usage count adds up. This adds IORING_REGISTER_FILES, and IORING_UNREGISTER_FILES opcodes for the io_uring_register(2) system call. The arguments passed in must be an array of __s32 holding file descriptors, and nr_args should hold the number of file descriptors the application wishes to pin for the duration of the io_uring instance (or until IORING_UNREGISTER_FILES is called). When used, the application must set IOSQE_FIXED_FILE in the sqe->flags member. Then, instead of setting sqe->fd to the real fd, it sets sqe->fd to the index in the array passed in to IORING_REGISTER_FILES. Files are automatically unregistered when the io_uring instance is torn down. An application need only unregister if it wishes to register a new set of fds. Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index cf28f7a11f12..6257478d55e9 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -16,7 +16,7 @@ */ struct io_uring_sqe { __u8 opcode; /* type of operation for this sqe */ - __u8 flags; /* as of now unused */ + __u8 flags; /* IOSQE_ flags */ __u16 ioprio; /* ioprio for the request */ __s32 fd; /* file descriptor to do IO on */ __u64 off; /* offset into file */ @@ -33,6 +33,11 @@ struct io_uring_sqe { }; }; +/* + * sqe->flags + */ +#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */ + /* * io_uring_setup() flags */ @@ -113,5 +118,7 @@ struct io_uring_params { */ #define IORING_REGISTER_BUFFERS 0 #define IORING_UNREGISTER_BUFFERS 1 +#define IORING_REGISTER_FILES 2 +#define IORING_UNREGISTER_FILES 3 #endif -- cgit v1.2.3 From 6c271ce2f1d572f7fa225700a13cfe7ced492434 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Jan 2019 11:22:30 -0700 Subject: io_uring: add submission polling This enables an application to do IO, without ever entering the kernel. By using the SQ ring to fill in new sqes and watching for completions on the CQ ring, we can submit and reap IOs without doing a single system call. The kernel side thread will poll for new submissions, and in case of HIPRI/polled IO, it'll also poll for completions. By default, we allow 1 second of active spinning. This can by changed by passing in a different grace period at io_uring_register(2) time. If the thread exceeds this idle time without having any work to do, it will set: sq_ring->flags |= IORING_SQ_NEED_WAKEUP. The application will have to call io_uring_enter() to start things back up again. If IO is kept busy, that will never be needed. Basically an application that has this feature enabled will guard it's io_uring_enter(2) call with: read_barrier(); if (*sq_ring->flags & IORING_SQ_NEED_WAKEUP) io_uring_enter(fd, 0, 0, IORING_ENTER_SQ_WAKEUP); instead of calling it unconditionally. It's mandatory to use fixed files with this feature. Failure to do so will result in the application getting an -EBADF CQ entry when submitting IO. Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6257478d55e9..0ec74bab8dbe 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -42,6 +42,8 @@ struct io_uring_sqe { * io_uring_setup() flags */ #define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ +#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */ +#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ #define IORING_OP_NOP 0 #define IORING_OP_READV 1 @@ -86,6 +88,11 @@ struct io_sqring_offsets { __u64 resv2; }; +/* + * sq_ring->flags + */ +#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ + struct io_cqring_offsets { __u32 head; __u32 tail; @@ -100,6 +107,7 @@ struct io_cqring_offsets { * io_uring_enter(2) flags */ #define IORING_ENTER_GETEVENTS (1U << 0) +#define IORING_ENTER_SQ_WAKEUP (1U << 1) /* * Passed in for io_uring_setup(2). Copied back with updated info on success @@ -108,7 +116,9 @@ struct io_uring_params { __u32 sq_entries; __u32 cq_entries; __u32 flags; - __u32 resv[7]; + __u32 sq_thread_cpu; + __u32 sq_thread_idle; + __u32 resv[5]; struct io_sqring_offsets sq_off; struct io_cqring_offsets cq_off; }; -- cgit v1.2.3 From f7c917ba11a67632a8452ea99fe132f626a7a2cc Mon Sep 17 00:00:00 2001 From: brakmo Date: Fri, 1 Mar 2019 12:38:46 -0800 Subject: bpf: add bpf helper bpf_skb_ecn_set_ce This patch adds a new bpf helper BPF_FUNC_skb_ecn_set_ce "int bpf_skb_ecn_set_ce(struct sk_buff *skb)". It is added to BPF_PROG_TYPE_CGROUP_SKB typed bpf_prog which currently can be attached to the ingress and egress path. The helper is needed because his type of bpf_prog cannot modify the skb directly. This helper is used to set the ECN field of ECN capable IP packets to ce (congestion encountered) in the IPv6 or IPv4 header of the skb. It can be used by a bpf_prog to manage egress or ingress network bandwdith limit per cgroupv2 by inducing an ECN response in the TCP sender. This works best when using DCTCP. Signed-off-by: Lawrence Brakmo Signed-off-by: Martin KaFai Lau Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2e308e90ffea..3c38ac9a92a7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2359,6 +2359,13 @@ union bpf_attr { * Return * A **struct bpf_tcp_sock** pointer on success, or NULL in * case of failure. + * + * int bpf_skb_ecn_set_ce(struct sk_buf *skb) + * Description + * Sets ECN of IP header to ce (congestion encountered) if + * current value is ect (ECN capable). Works with IPv6 and IPv4. + * Return + * 1 if set, 0 if not set. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2457,7 +2464,8 @@ union bpf_attr { FN(spin_lock), \ FN(spin_unlock), \ FN(sk_fullsock), \ - FN(tcp_sock), + FN(tcp_sock), \ + FN(skb_ecn_set_ce), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 0b5c7efdfc6e389ec6840579fe90bdb6f42b08dc Mon Sep 17 00:00:00 2001 From: Kevin Darbyshire-Bryant Date: Fri, 1 Mar 2019 16:04:05 +0100 Subject: sch_cake: Permit use of connmarks as tin classifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add flag 'FWMARK' to enable use of firewall connmarks as tin selector. The connmark (skbuff->mark) needs to be in the range 1->tin_cnt ie. for diffserv3 the mark needs to be 1->3. Background Typically CAKE uses DSCP as the basis for tin selection. DSCP values are relatively easily changed as part of the egress path, usually with iptables & the mangle table, ingress is more challenging. CAKE is often used on the WAN interface of a residential gateway where passthrough of DSCP from the ISP is either missing or set to unhelpful values thus use of ingress DSCP values for tin selection isn't helpful in that environment. An approach to solving the ingress tin selection problem is to use CAKE's understanding of tc filters. Naive tc filters could match on source/destination port numbers and force tin selection that way, but multiple filters don't scale particularly well as each filter must be traversed whether it matches or not. e.g. a simple example to map 3 firewall marks to tins: MAJOR=$( tc qdisc show dev $DEV | head -1 | awk '{print $3}' ) tc filter add dev $DEV parent $MAJOR protocol all handle 0x01 fw action skbedit priority ${MAJOR}1 tc filter add dev $DEV parent $MAJOR protocol all handle 0x02 fw action skbedit priority ${MAJOR}2 tc filter add dev $DEV parent $MAJOR protocol all handle 0x03 fw action skbedit priority ${MAJOR}3 Another option is to use eBPF cls_act with tc filters e.g. MAJOR=$( tc qdisc show dev $DEV | head -1 | awk '{print $3}' ) tc filter add dev $DEV parent $MAJOR bpf da obj my-bpf-fwmark-to-class.o This has the disadvantages of a) needing someone to write & maintain the bpf program, b) a bpf toolchain to compile it and c) needing to hardcode the major number in the bpf program so it matches the cake instance (or forcing the cake instance to a particular major number) since the major number cannot be passed to the bpf program via tc command line. As already hinted at by the previous examples, it would be helpful to associate tins with something that survives the Internet path and ideally allows tin selection on both egress and ingress. Netfilter's conntrack permits setting an identifying mark on a connection which can also be restored to an ingress packet with tc action connmark e.g. tc filter add dev eth0 parent ffff: protocol all prio 10 u32 \ match u32 0 0 flowid 1:1 action connmark action mirred egress redirect dev ifb1 Since tc's connmark action has restored any connmark into skb->mark, any of the previous solutions are based upon it and in one form or another copy that mark to the skb->priority field where again CAKE picks this up. This change cuts out at least one of the (less intuitive & non-scalable) middlemen and permit direct access to skb->mark. Signed-off-by: Kevin Darbyshire-Bryant Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 1eb572ef3f27..7ee74c3474bf 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1021,6 +1021,7 @@ enum { TCA_CAKE_INGRESS, TCA_CAKE_ACK_FILTER, TCA_CAKE_SPLIT_GSO, + TCA_CAKE_FWMARK, __TCA_CAKE_MAX }; #define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1) -- cgit v1.2.3 From 9036b2fe092a107856edd1a3bad48b83f2b45000 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Fri, 1 Mar 2019 15:31:03 -0800 Subject: net: ipv6: add socket option IPV6_ROUTER_ALERT_ISOLATE By default IPv6 socket with IPV6_ROUTER_ALERT socket option set will receive all IPv6 RA packets from all namespaces. IPV6_ROUTER_ALERT_ISOLATE socket option restricts packets received by the socket to be only from the socket's namespace. Signed-off-by: Maxim Martynov Signed-off-by: Francesco Ruggeri Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/in6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 71d82fe15b03..9f2273a08356 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -178,6 +178,7 @@ struct in6_flowlabel_req { #define IPV6_JOIN_ANYCAST 27 #define IPV6_LEAVE_ANYCAST 28 #define IPV6_MULTICAST_ALL 29 +#define IPV6_ROUTER_ALERT_ISOLATE 30 /* IPV6_MTU_DISCOVER values */ #define IPV6_PMTUDISC_DONT 0 -- cgit v1.2.3 From ca215086b14b89a0e70fc211314944aa6ce50020 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 5 Mar 2019 15:42:23 -0800 Subject: mm: convert PG_balloon to PG_offline PG_balloon was introduced to implement page migration/compaction for pages inflated in virtio-balloon. Nowadays, it is only a marker that a page is part of virtio-balloon and therefore logically offline. We also want to make use of this flag in other balloon drivers - for inflated pages or when onlining a section but keeping some pages offline (e.g. used right now by XEN and Hyper-V via set_online_page_callback()). We are going to expose this flag to dump tools like makedumpfile. But instead of exposing PG_balloon, let's generalize the concept of marking pages as logically offline, so it can be reused for other purposes later on. Rename PG_balloon to PG_offline. This is an indicator that the page is logically offline, the content stale and that it should not be touched (e.g. a hypervisor would have to allocate backing storage in order for the guest to dump an unused page). We can then e.g. exclude such pages from dumps. We replace and reuse KPF_BALLOON (23), as this shouldn't really harm (and for now the semantics stay the same). In following patches, we will make use of this bit also in other balloon drivers. While at it, document PGTABLE. [akpm@linux-foundation.org: fix comment text, per David] Link: http://lkml.kernel.org/r/20181119101616.8901-3-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Konstantin Khlebnikov Acked-by: Michael S. Tsirkin Acked-by: Pankaj gupta Cc: Jonathan Corbet Cc: Alexey Dobriyan Cc: Mike Rapoport Cc: Christian Hansen Cc: Vlastimil Babka Cc: "Kirill A. Shutemov" Cc: Stephen Rothwell Cc: Matthew Wilcox Cc: Michal Hocko Cc: Pavel Tatashin Cc: Alexander Duyck Cc: Naoya Horiguchi Cc: Miles Chen Cc: David Rientjes Cc: Kazuhito Hagio Cc: Arnd Bergmann Cc: Baoquan He Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Dave Young Cc: Greg Kroah-Hartman Cc: Haiyang Zhang Cc: Juergen Gross Cc: Julien Freche Cc: Kairui Song Cc: "K. Y. Srinivasan" Cc: Len Brown Cc: Lianbo Jiang Cc: Michal Hocko Cc: Nadav Amit Cc: Omar Sandoval Cc: Pavel Machek Cc: Rafael J. Wysocki Cc: "Rafael J. Wysocki" Cc: Stefano Stabellini Cc: Stephen Hemminger Cc: Vitaly Kuznetsov Cc: Xavier Deguillard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/kernel-page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h index 21b9113c69da..6f2f2720f3ac 100644 --- a/include/uapi/linux/kernel-page-flags.h +++ b/include/uapi/linux/kernel-page-flags.h @@ -32,7 +32,7 @@ #define KPF_KSM 21 #define KPF_THP 22 -#define KPF_BALLOON 23 +#define KPF_OFFLINE 23 #define KPF_ZERO_PAGE 24 #define KPF_IDLE 25 #define KPF_PGTABLE 26 -- cgit v1.2.3 From ab3948f58ff841e51feb845720624665ef5b7ef3 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Tue, 5 Mar 2019 15:47:54 -0800 Subject: mm/memfd: add an F_SEAL_FUTURE_WRITE seal to memfd Android uses ashmem for sharing memory regions. We are looking forward to migrating all usecases of ashmem to memfd so that we can possibly remove the ashmem driver in the future from staging while also benefiting from using memfd and contributing to it. Note staging drivers are also not ABI and generally can be removed at anytime. One of the main usecases Android has is the ability to create a region and mmap it as writeable, then add protection against making any "future" writes while keeping the existing already mmap'ed writeable-region active. This allows us to implement a usecase where receivers of the shared memory buffer can get a read-only view, while the sender continues to write to the buffer. See CursorWindow documentation in Android for more details: https://developer.android.com/reference/android/database/CursorWindow This usecase cannot be implemented with the existing F_SEAL_WRITE seal. To support the usecase, this patch adds a new F_SEAL_FUTURE_WRITE seal which prevents any future mmap and write syscalls from succeeding while keeping the existing mmap active. A better way to do F_SEAL_FUTURE_WRITE seal was discussed [1] last week where we don't need to modify core VFS structures to get the same behavior of the seal. This solves several side-effects pointed by Andy. self-tests are provided in later patch to verify the expected semantics. [1] https://lore.kernel.org/lkml/20181111173650.GA256781@google.com/ Thanks a lot to Andy for suggestions to improve code. Link: http://lkml.kernel.org/r/20190112203816.85534-2-joel@joelfernandes.org Signed-off-by: Joel Fernandes (Google) Acked-by: John Stultz Cc: Andy Lutomirski Cc: Minchan Kim Cc: Jann Horn Cc: Al Viro Cc: Andy Lutomirski Cc: Hugh Dickins Cc: J. Bruce Fields Cc: Jeff Layton Cc: Marc-Andr Lureau Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/fcntl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 6448cdd9a350..a2f8658f1c55 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -41,6 +41,7 @@ #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ #define F_SEAL_GROW 0x0004 /* prevent file from growing */ #define F_SEAL_WRITE 0x0008 /* prevent writes */ +#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ /* (1U << 31) is reserved for signed error codes */ /* -- cgit v1.2.3 From 221c5eb2338232f7340386de1c43decc32682e58 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 Jan 2019 09:41:58 -0700 Subject: io_uring: add support for IORING_OP_POLL This is basically a direct port of bfe4037e722e, which implements a one-shot poll command through aio. Description below is based on that commit as well. However, instead of adding a POLL command and relying on io_cancel(2) to remove it, we mimic the epoll(2) interface of having a command to add a poll notification, IORING_OP_POLL_ADD, and one to remove it again, IORING_OP_POLL_REMOVE. To poll for a file descriptor the application should submit an sqe of type IORING_OP_POLL. It will poll the fd for the events specified in the poll_events field. Unlike poll or epoll without EPOLLONESHOT this interface always works in one shot mode, that is once the sqe is completed, it will have to be resubmitted. Reviewed-by: Hannes Reinecke Based-on-code-from: Christoph Hellwig Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 0ec74bab8dbe..e23408692118 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -25,6 +25,7 @@ struct io_uring_sqe { union { __kernel_rwf_t rw_flags; __u32 fsync_flags; + __u16 poll_events; }; __u64 user_data; /* data to be passed back at completion time */ union { @@ -51,6 +52,8 @@ struct io_uring_sqe { #define IORING_OP_FSYNC 3 #define IORING_OP_READ_FIXED 4 #define IORING_OP_WRITE_FIXED 5 +#define IORING_OP_POLL_ADD 6 +#define IORING_OP_POLL_REMOVE 7 /* * sqe->fsync_flags -- cgit v1.2.3 From 54d50897d544c874562253e2a8f70dfcad22afe8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 7 Mar 2019 16:27:14 -0800 Subject: linux/kernel.h: split *_MAX and *_MIN macros into tends to be cluttered because we often put various sort of unrelated stuff in it. So, we have split out a sensible chunk of code into a separate header from time to time. This commit splits out the *_MAX and *_MIN defines. The standard header contains various MAX, MIN constants including numerial limits. [1] I think it makes sense to move in-kernel MAX, MIN constants into include/linux/limits.h. We already have include/uapi/linux/limits.h to contain some user-space constants. I changed its include guard to _UAPI_LINUX_LIMITS_H. This change has no impact to the user-space because scripts/headers_install.sh rips off the '_UAPI' prefix from the include guards of exported headers. [1] http://pubs.opengroup.org/onlinepubs/009604499/basedefs/limits.h.html Link: http://lkml.kernel.org/r/1549156242-20806-2-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Alex Elder Cc: Alexey Dobriyan Cc: Zhang Yanmin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/limits.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/limits.h b/include/uapi/linux/limits.h index c3547f07605c..6bcbe3068761 100644 --- a/include/uapi/linux/limits.h +++ b/include/uapi/linux/limits.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _LINUX_LIMITS_H -#define _LINUX_LIMITS_H +#ifndef _UAPI_LINUX_LIMITS_H +#define _UAPI_LINUX_LIMITS_H #define NR_OPEN 1024 -- cgit v1.2.3 From 60d6d04ca3abb34d5e89f030dbea440d9715a168 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Thu, 7 Mar 2019 16:29:09 -0800 Subject: autofs: add ignore mount option Add an autofs file system mount option that can be used to provide a generic indicator to applications that the mount entry should be ignored when displaying mount information. In other OSes that provide autofs and that provide a mount list to user space based on the kernel mount list a no-op mount option ("ignore" is the one use on the most common OS) is allowed so that autofs file system users can optionally use it. The idea is that it be used by user space programs to exclude autofs mounts from consideration when reading the mounts list. Prior to the change to link /etc/mtab to /proc/self/mounts all I needed to do to achieve this was to use mount(2) and not update the mtab but now that no longer works. I know the symlinking happened a long time ago and I considered doing this then but, at the time I couldn't remember the commonly used option name and thought persuading the various utility maintainers would be too hard. But now I have a RHEL request to do this for compatibility for a widely used product so I want to go ahead with it and try and enlist the help of some utility package maintainers. Clearly, without the option nothing can be done so it's at least a start. Link: http://lkml.kernel.org/r/154725123970.11260.6113771566924907275.stgit@pluto-themaw-net Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/auto_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index 082119630b49..1f7925afad2d 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h @@ -23,7 +23,7 @@ #define AUTOFS_MIN_PROTO_VERSION 3 #define AUTOFS_MAX_PROTO_VERSION 5 -#define AUTOFS_PROTO_SUBVERSION 4 +#define AUTOFS_PROTO_SUBVERSION 5 /* * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed -- cgit v1.2.3 From 6eb3c3d0a52dca337e327ae8868ca1f44a712e02 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 7 Mar 2019 16:29:26 -0800 Subject: exec: increase BINPRM_BUF_SIZE to 256 Large enterprise clients often run applications out of networked file systems where the IT mandated layout of project volumes can end up leading to paths that are longer than 128 characters. Bumping this up to the next order of two solves this problem in all but the most egregious case while still fitting into a 512b slab. [oleg@redhat.com: update comment, per Kees] Link: http://lkml.kernel.org/r/20181112160956.GA28472@redhat.com Signed-off-by: Oleg Nesterov Reported-by: Ben Woodard Reviewed-by: Andrew Morton Acked-by: Michal Hocko Acked-by: Kees Cook Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/binfmts.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/binfmts.h b/include/uapi/linux/binfmts.h index 4abad03a8853..689025d9c185 100644 --- a/include/uapi/linux/binfmts.h +++ b/include/uapi/linux/binfmts.h @@ -16,6 +16,6 @@ struct pt_regs; #define MAX_ARG_STRINGS 0x7FFFFFFF /* sizeof(linux_binprm->buf) */ -#define BINPRM_BUF_SIZE 128 +#define BINPRM_BUF_SIZE 256 #endif /* _UAPI_LINUX_BINFMTS_H */ -- cgit v1.2.3 From dbafd7ddd62369b2f3926ab847cbf8fc40e800b7 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 12 Mar 2019 10:23:04 -0700 Subject: bpf: Add bpf_get_listener_sock(struct bpf_sock *sk) helper Add a new helper "struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)" which returns a bpf_sock in TCP_LISTEN state. It will trace back to the listener sk from a request_sock if possible. It returns NULL for all other cases. No reference is taken because the helper ensures the sk is in SOCK_RCU_FREE (where the TCP_LISTEN sock should be in). Hence, bpf_sk_release() is unnecessary and the verifier does not allow bpf_sk_release(listen_sk) to be called either. The following is also allowed because the bpf_prog is run under rcu_read_lock(): sk = bpf_sk_lookup_tcp(); /* if (!sk) { ... } */ listen_sk = bpf_get_listener_sock(sk); /* if (!listen_sk) { ... } */ bpf_sk_release(sk); src_port = listen_sk->src_port; /* Allowed */ Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3c38ac9a92a7..983b25cb608d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2366,6 +2366,14 @@ union bpf_attr { * current value is ect (ECN capable). Works with IPv6 and IPv4. * Return * 1 if set, 0 if not set. + * + * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) + * Description + * Return a **struct bpf_sock** pointer in TCP_LISTEN state. + * bpf_sk_release() is unnecessary and not allowed. + * Return + * A **struct bpf_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2465,7 +2473,8 @@ union bpf_attr { FN(spin_unlock), \ FN(sk_fullsock), \ FN(tcp_sock), \ - FN(skb_ecn_set_ce), + FN(skb_ecn_set_ce), \ + FN(get_listener_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 62369db2df8d1edfa040878203b446e023a16802 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 14 Mar 2019 12:38:39 +0000 Subject: bpf: fix documentation for eBPF helpers Another round of minor fixes for the documentation of the BPF helpers located in the UAPI bpf.h header file. Changes include: - Moving around description of some helpers, to keep the descriptions in the same order as helpers are declared (bpf_map_push_elem(), leftover from commit 90b1023f68c7 ("bpf: fix documentation for eBPF helpers"), bpf_rc_keydown(), and bpf_skb_ancestor_cgroup_id()). - Fixing typos ("contex" -> "context"). - Harmonising return types ("void* " -> "void *", "uint64_t" -> "u64"). - Addition of the "bpf_" prefix to bpf_get_storage(). - Light additions of RST markup on some keywords. - Empty line deletion between description and return value for bpf_tcp_sock(). - Edit for the description for bpf_skb_ecn_set_ce() (capital letters, acronym expansion, no effect if ECT not set, more details on return value). Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 128 ++++++++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 63 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 983b25cb608d..4465d00d3493 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -502,16 +502,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) - * Description - * Push an element *value* in *map*. *flags* is one of: - * - * **BPF_EXIST** - * If the queue/stack is full, the oldest element is removed to - * make room for this. - * Return - * 0 on success, or a negative error in case of failure. - * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1435,14 +1425,14 @@ union bpf_attr { * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_addr** contex. + * *skb*, but gets socket from **struct bpf_sock_addr** context. * Return * A 8-byte long non-decreasing number. * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_ops** contex. + * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return * A 8-byte long non-decreasing number. * @@ -2098,52 +2088,52 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * int bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to - * report a successfully decoded key press with *scancode*, - * *toggle* value in the given *protocol*. The scancode will be - * translated to a keycode using the rc keymap, and reported as - * an input key down event. After a period a key up event is - * generated. This period can be extended by calling either - * **bpf_rc_keydown**\ () again with the same values, or calling - * **bpf_rc_repeat**\ (). + * report a successfully decoded repeat key message. This delays + * the generation of a key up event for previously generated + * key down event. * - * Some protocols include a toggle bit, in case the button was - * released and pressed again between consecutive scancodes. + * Some IR protocols like NEC have a special IR message for + * repeating last button, for when a button is held down. * * The *ctx* should point to the lirc sample as passed into * the program. * - * The *protocol* is the decoded protocol number (see - * **enum rc_proto** for some predefined values). - * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * Return * 0 * - * int bpf_rc_repeat(void *ctx) + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to - * report a successfully decoded repeat key message. This delays - * the generation of a key up event for previously generated - * key down event. + * report a successfully decoded key press with *scancode*, + * *toggle* value in the given *protocol*. The scancode will be + * translated to a keycode using the rc keymap, and reported as + * an input key down event. After a period a key up event is + * generated. This period can be extended by calling either + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). * - * Some IR protocols like NEC have a special IR message for - * repeating last button, for when a button is held down. + * Some protocols include a toggle bit, in case the button was + * released and pressed again between consecutive scancodes. * * The *ctx* should point to the lirc sample as passed into * the program. * + * The *protocol* is the decoded protocol number (see + * **enum rc_proto** for some predefined values). + * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * Return * 0 * - * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb) + * u64 bpf_skb_cgroup_id(struct sk_buff *skb) * Description * Return the cgroup v2 id of the socket associated with the *skb*. * This is roughly similar to the **bpf_get_cgroup_classid**\ () @@ -2159,30 +2149,12 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) - * Description - * Return id of cgroup v2 that is ancestor of cgroup associated - * with the *skb* at the *ancestor_level*. The root cgroup is at - * *ancestor_level* zero and each step down the hierarchy - * increments the level. If *ancestor_level* == level of cgroup - * associated with *skb*, then return value will be same as that - * of **bpf_skb_cgroup_id**\ (). - * - * The helper is useful to implement policies based on cgroups - * that are upper in hierarchy than immediate cgroup associated - * with *skb*. - * - * The format of returned id and helper limitations are same as in - * **bpf_skb_cgroup_id**\ (). - * Return - * The id is returned or 0 in case the id could not be retrieved. - * * u64 bpf_get_current_cgroup_id(void) * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. * - * void* get_local_storage(void *map, u64 flags) + * void *bpf_get_local_storage(void *map, u64 flags) * Description * Get the pointer to the local storage area. * The type and the size of the local storage is defined @@ -2209,6 +2181,24 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) + * Description + * Return id of cgroup v2 that is ancestor of cgroup associated + * with the *skb* at the *ancestor_level*. The root cgroup is at + * *ancestor_level* zero and each step down the hierarchy + * increments the level. If *ancestor_level* == level of cgroup + * associated with *skb*, then return value will be same as that + * of **bpf_skb_cgroup_id**\ (). + * + * The helper is useful to implement policies based on cgroups + * that are upper in hierarchy than immediate cgroup associated + * with *skb*. + * + * The format of returned id and helper limitations are same as in + * **bpf_skb_cgroup_id**\ (). + * Return + * The id is returned or 0 in case the id could not be retrieved. + * * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child @@ -2289,6 +2279,16 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is + * removed to make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. @@ -2346,33 +2346,35 @@ union bpf_attr { * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_sock** pointer such - * that all the fields in bpf_sock can be accessed. + * that all the fields in this **bpf_sock** can be accessed. * Return - * A **struct bpf_sock** pointer on success, or NULL in + * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. * * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_tcp_sock** pointer from a * **struct bpf_sock** pointer. - * * Return - * A **struct bpf_tcp_sock** pointer on success, or NULL in + * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * * int bpf_skb_ecn_set_ce(struct sk_buf *skb) - * Description - * Sets ECN of IP header to ce (congestion encountered) if - * current value is ect (ECN capable). Works with IPv6 and IPv4. - * Return - * 1 if set, 0 if not set. + * Description + * Set ECN (Explicit Congestion Notification) field of IP header + * to **CE** (Congestion Encountered) if current value is **ECT** + * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 + * and IPv4. + * Return + * 1 if the **CE** flag is set (either by the current helper call + * or because it was already present), 0 if it is not set. * * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) * Description - * Return a **struct bpf_sock** pointer in TCP_LISTEN state. - * bpf_sk_release() is unnecessary and not allowed. + * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. + * **bpf_sk_release**\ () is unnecessary and not allowed. * Return - * A **struct bpf_sock** pointer on success, or NULL in + * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ -- cgit v1.2.3 From 0eb0978528d47699edd091dc2c337952ad8da436 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 14 Mar 2019 12:38:40 +0000 Subject: bpf: add documentation for helpers bpf_spin_lock(), bpf_spin_unlock() Add documentation for the BPF spinlock-related helpers to the doc in bpf.h. I added the constraints and restrictions coming with the use of spinlocks for BPF: not all of it is directly related to the use of the helper, but I thought it would be nice for users to find them in the man page. This list of restrictions is nearly a verbatim copy of the list in Alexei's commit log for those helpers. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4465d00d3493..929c8e537a14 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2343,6 +2343,61 @@ union bpf_attr { * Return * 0 * + * int bpf_spin_lock(struct bpf_spin_lock *lock) + * Description + * Acquire a spinlock represented by the pointer *lock*, which is + * stored as part of a value of a map. Taking the lock allows to + * safely update the rest of the fields in that value. The + * spinlock can (and must) later be released with a call to + * **bpf_spin_unlock**\ (\ *lock*\ ). + * + * Spinlocks in BPF programs come with a number of restrictions + * and constraints: + * + * * **bpf_spin_lock** objects are only allowed inside maps of + * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this + * list could be extended in the future). + * * BTF description of the map is mandatory. + * * The BPF program can take ONE lock at a time, since taking two + * or more could cause dead locks. + * * Only one **struct bpf_spin_lock** is allowed per map element. + * * When the lock is taken, calls (either BPF to BPF or helpers) + * are not allowed. + * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not + * allowed inside a spinlock-ed region. + * * The BPF program MUST call **bpf_spin_unlock**\ () to release + * the lock, on all execution paths, before it returns. + * * The BPF program can access **struct bpf_spin_lock** only via + * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () + * helpers. Loading or storing data into the **struct + * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. + * * To use the **bpf_spin_lock**\ () helper, the BTF description + * of the map value must be a struct and have **struct + * bpf_spin_lock** *anyname*\ **;** field at the top level. + * Nested lock inside another struct is not allowed. + * * The **struct bpf_spin_lock** *lock* field in a map value must + * be aligned on a multiple of 4 bytes in that value. + * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy + * the **bpf_spin_lock** field to user space. + * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from + * a BPF program, do not update the **bpf_spin_lock** field. + * * **bpf_spin_lock** cannot be on the stack or inside a + * networking packet (it can only be inside of a map values). + * * **bpf_spin_lock** is available to root only. + * * Tracing programs and socket filter programs cannot use + * **bpf_spin_lock**\ () due to insufficient preemption checks + * (but this may change in the future). + * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. + * Return + * 0 + * + * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * Description + * Release the *lock* previously locked by a call to + * **bpf_spin_lock**\ (\ *lock*\ ). + * Return + * 0 + * * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_sock** pointer such -- cgit v1.2.3 From 07ba9e7be423423043c5090a2f395c0da26e1b3d Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 18 Jan 2019 11:18:17 -0800 Subject: Input: document meanings of KEY_SCREEN and KEY_ZOOM It is hard to say what KEY_SCREEN and KEY_ZOOM mean, but historically DVB folks have used them to indicate switch to full screen mode. Later, they converged on using KEY_ZOOM to switch into full screen mode and KEY)SCREEN to control aspect ratio (see Documentation/media/uapi/rc/rc-tables.rst). Let's commit to these uses, and define: - KEY_FULL_SCREEN (and make KEY_ZOOM its alias) - KEY_ASPECT_RATIO (and make KEY_SCREEN its alias) Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index ae366b87426a..bc5054e51bef 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -439,10 +439,12 @@ #define KEY_TITLE 0x171 #define KEY_SUBTITLE 0x172 #define KEY_ANGLE 0x173 -#define KEY_ZOOM 0x174 +#define KEY_FULL_SCREEN 0x174 /* AC View Toggle */ +#define KEY_ZOOM KEY_FULL_SCREEN #define KEY_MODE 0x175 #define KEY_KEYBOARD 0x176 -#define KEY_SCREEN 0x177 +#define KEY_ASPECT_RATIO 0x177 /* HUTRR37: Aspect */ +#define KEY_SCREEN KEY_ASPECT_RATIO #define KEY_PC 0x178 /* Media Select Computer */ #define KEY_TV 0x179 /* Media Select TV */ #define KEY_TV2 0x17a /* Media Select Cable */ -- cgit v1.2.3 From 0532a1b0d045115521a93acf28f1270df89ad806 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 22 Mar 2019 09:19:34 +0100 Subject: virt: vbox: Implement passing requestor info to the host for VirtualBox 6.0.x VirtualBox 6.0.x has a new feature where the guest kernel driver passes info about the origin of the request (e.g. userspace or kernelspace) to the hypervisor. If we do not pass this information then when running the 6.0.x userspace guest-additions tools on a 6.0.x host, some requests will get denied with a VERR_VERSION_MISMATCH error, breaking vboxservice.service and the mounting of shared folders marked to be auto-mounted. This commit implements passing the requestor info to the host, fixing this. Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/vbox_vmmdev_types.h | 60 ++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vbox_vmmdev_types.h b/include/uapi/linux/vbox_vmmdev_types.h index 0e68024f36c7..26f39816af14 100644 --- a/include/uapi/linux/vbox_vmmdev_types.h +++ b/include/uapi/linux/vbox_vmmdev_types.h @@ -102,6 +102,66 @@ enum vmmdev_request_type { #define VMMDEVREQ_HGCM_CALL VMMDEVREQ_HGCM_CALL32 #endif +/* vmmdev_request_header.requestor defines */ + +/* Requestor user not given. */ +#define VMMDEV_REQUESTOR_USR_NOT_GIVEN 0x00000000 +/* The kernel driver (vboxguest) is the requestor. */ +#define VMMDEV_REQUESTOR_USR_DRV 0x00000001 +/* Some other kernel driver is the requestor. */ +#define VMMDEV_REQUESTOR_USR_DRV_OTHER 0x00000002 +/* The root or a admin user is the requestor. */ +#define VMMDEV_REQUESTOR_USR_ROOT 0x00000003 +/* Regular joe user is making the request. */ +#define VMMDEV_REQUESTOR_USR_USER 0x00000006 +/* User classification mask. */ +#define VMMDEV_REQUESTOR_USR_MASK 0x00000007 + +/* Kernel mode request. Note this is 0, check for !USERMODE instead. */ +#define VMMDEV_REQUESTOR_KERNEL 0x00000000 +/* User mode request. */ +#define VMMDEV_REQUESTOR_USERMODE 0x00000008 +/* User or kernel mode classification mask. */ +#define VMMDEV_REQUESTOR_MODE_MASK 0x00000008 + +/* Don't know the physical console association of the requestor. */ +#define VMMDEV_REQUESTOR_CON_DONT_KNOW 0x00000000 +/* + * The request originates with a process that is NOT associated with the + * physical console. + */ +#define VMMDEV_REQUESTOR_CON_NO 0x00000010 +/* Requestor process is associated with the physical console. */ +#define VMMDEV_REQUESTOR_CON_YES 0x00000020 +/* Console classification mask. */ +#define VMMDEV_REQUESTOR_CON_MASK 0x00000030 + +/* Requestor is member of special VirtualBox user group. */ +#define VMMDEV_REQUESTOR_GRP_VBOX 0x00000080 + +/* Note: trust level is for windows guests only, linux always uses not-given */ +/* Requestor trust level: Unspecified */ +#define VMMDEV_REQUESTOR_TRUST_NOT_GIVEN 0x00000000 +/* Requestor trust level: Untrusted (SID S-1-16-0) */ +#define VMMDEV_REQUESTOR_TRUST_UNTRUSTED 0x00001000 +/* Requestor trust level: Untrusted (SID S-1-16-4096) */ +#define VMMDEV_REQUESTOR_TRUST_LOW 0x00002000 +/* Requestor trust level: Medium (SID S-1-16-8192) */ +#define VMMDEV_REQUESTOR_TRUST_MEDIUM 0x00003000 +/* Requestor trust level: Medium plus (SID S-1-16-8448) */ +#define VMMDEV_REQUESTOR_TRUST_MEDIUM_PLUS 0x00004000 +/* Requestor trust level: High (SID S-1-16-12288) */ +#define VMMDEV_REQUESTOR_TRUST_HIGH 0x00005000 +/* Requestor trust level: System (SID S-1-16-16384) */ +#define VMMDEV_REQUESTOR_TRUST_SYSTEM 0x00006000 +/* Requestor trust level >= Protected (SID S-1-16-20480, S-1-16-28672) */ +#define VMMDEV_REQUESTOR_TRUST_PROTECTED 0x00007000 +/* Requestor trust level mask */ +#define VMMDEV_REQUESTOR_TRUST_MASK 0x00007000 + +/* Requestor is using the less trusted user device node (/dev/vboxuser) */ +#define VMMDEV_REQUESTOR_USER_DEVICE 0x00008000 + /** HGCM service location types. */ enum vmmdev_hgcm_service_location_type { VMMDEV_HGCM_LOC_INVALID = 0, -- cgit v1.2.3 From 3d9683cf3bfb6d4e4605a153958dfca7e18b52f2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 18 Mar 2019 18:08:12 +0900 Subject: KVM: export and iif KVM is supported I do not see any consistency about headers_install of and . According to my analysis of Linux 5.1-rc1, there are 3 groups: [1] Both and are exported alpha, arm, hexagon, mips, powerpc, s390, sparc, x86 [2] is exported, but is not arc, arm64, c6x, h8300, ia64, m68k, microblaze, nios2, openrisc, parisc, sh, unicore32, xtensa [3] Neither nor is exported csky, nds32, riscv This does not match to the actual KVM support. At least, [2] is half-baked. Nor do arch maintainers look like they care about this. For example, commit 0add53713b1c ("microblaze: Add missing kvm_para.h to Kbuild") exported to user-space in order to fix an in-kernel build error. We have two ways to make this consistent: [A] export both and for all architectures, irrespective of the KVM support [B] Match the header export of and to the KVM support My first attempt was [A] because the code looks cleaner, but Paolo suggested [B]. So, this commit goes with [B]. For most architectures, was moved to the kernel-space. I changed include/uapi/linux/Kbuild so that it checks generated asm/kvm_para.h as well as check-in ones. After this commit, there will be two groups: [1] Both and are exported arm, arm64, mips, powerpc, s390, x86 [2] Neither nor is exported alpha, arc, c6x, csky, h8300, hexagon, ia64, m68k, microblaze, nds32, nios2, openrisc, parisc, riscv, sh, sparc, unicore32, xtensa Signed-off-by: Masahiro Yamada Acked-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- include/uapi/linux/Kbuild | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 5f24b50c9e88..059dc2bedaf6 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -7,5 +7,7 @@ no-export-headers += kvm.h endif ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm_para.h),) +ifeq ($(wildcard $(objtree)/arch/$(SRCARCH)/include/generated/uapi/asm/kvm_para.h),) no-export-headers += kvm_para.h endif +endif -- cgit v1.2.3 From afe64245af9f58267e7fa8fb76ad5650ee7ec25f Mon Sep 17 00:00:00 2001 From: Michael Zhivich Date: Mon, 8 Apr 2019 10:48:45 -0400 Subject: ethtool: avoid signed-unsigned comparison in ethtool_validate_speed() When building C++ userspace code that includes ethtool.h with "-Werror -Wall", g++ complains about signed-unsigned comparison in ethtool_validate_speed() due to definition of SPEED_UNKNOWN as -1. Explicitly cast SPEED_UNKNOWN to __u32 to match type of ethtool_validate_speed() argument. Signed-off-by: Michael Zhivich Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 3652b239dad1..d473e5ed044c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1591,7 +1591,7 @@ enum ethtool_link_mode_bit_indices { static inline int ethtool_validate_speed(__u32 speed) { - return speed <= INT_MAX || speed == SPEED_UNKNOWN; + return speed <= INT_MAX || speed == (__u32)SPEED_UNKNOWN; } /* Duplex, half or full. */ -- cgit v1.2.3