From 23c42a403a9cfdbad6004a556c927be7dd61a8ee Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 27 Oct 2018 15:07:40 +0200 Subject: netfilter: ipset: Introduction of new commands and protocol version 7 Two new commands (IPSET_CMD_GET_BYNAME, IPSET_CMD_GET_BYINDEX) are introduced. The new commands makes possible to eliminate the getsockopt operation (in iptables set/SET match/target) and thus use only netlink communication between userspace and kernel for ipset. With the new protocol version, userspace can exactly know which functionality is supported by the running kernel. Both the kernel and userspace is fully backward compatible. Signed-off-by: Jozsef Kadlecsik --- include/uapi/linux/netfilter/ipset/ip_set.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 60236f694143..ea69ca21ff23 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -13,8 +13,9 @@ #include -/* The protocol version */ -#define IPSET_PROTOCOL 6 +/* The protocol versions */ +#define IPSET_PROTOCOL 7 +#define IPSET_PROTOCOL_MIN 6 /* The max length of strings including NUL: set and type identifiers */ #define IPSET_MAXNAMELEN 32 @@ -38,17 +39,19 @@ enum ipset_cmd { IPSET_CMD_TEST, /* 11: Test an element in a set */ IPSET_CMD_HEADER, /* 12: Get set header data only */ IPSET_CMD_TYPE, /* 13: Get set type */ + IPSET_CMD_GET_BYNAME, /* 14: Get set index by name */ + IPSET_CMD_GET_BYINDEX, /* 15: Get set name by index */ IPSET_MSG_MAX, /* Netlink message commands */ /* Commands in userspace: */ - IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */ - IPSET_CMD_HELP, /* 15: Get help */ - IPSET_CMD_VERSION, /* 16: Get program version */ - IPSET_CMD_QUIT, /* 17: Quit from interactive mode */ + IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 16: Enter restore mode */ + IPSET_CMD_HELP, /* 17: Get help */ + IPSET_CMD_VERSION, /* 18: Get program version */ + IPSET_CMD_QUIT, /* 19: Quit from interactive mode */ IPSET_CMD_MAX, - IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */ + IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 20: Commit buffered commands */ }; /* Attributes at command level */ @@ -66,6 +69,7 @@ enum { IPSET_ATTR_LINENO, /* 9: Restore lineno */ IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */ IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */ + IPSET_ATTR_INDEX, /* 11: Kernel index of set */ __IPSET_ATTR_CMD_MAX, }; #define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1) @@ -223,6 +227,7 @@ enum ipset_adt { /* Sets are identified by an index in kernel space. Tweak with ip_set_id_t * and IPSET_INVALID_ID if you want to increase the max number of sets. + * Also, IPSET_ATTR_INDEX must be changed. */ typedef __u16 ip_set_id_t; -- cgit v1.2.3 From e20cf8d3f1f763ad28a9cb3b41305b8a8a42653e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:29 +0100 Subject: udp: implement GRO for plain UDP sockets. This is the RX counterpart of commit bec1f6f69736 ("udp: generate gso with UDP_SEGMENT"). When UDP_GRO is enabled, such socket is also eligible for GRO in the rx path: UDP segments directed to such socket are assembled into a larger GSO_UDP_L4 packet. The core UDP GRO support is enabled with setsockopt(UDP_GRO). Initial benchmark numbers: Before: udp rx: 1079 MB/s 769065 calls/s After: udp rx: 1466 MB/s 24877 calls/s This change introduces a side effect in respect to UDP tunnels: after a UDP tunnel creation, now the kernel performs a lookup per ingress UDP packet, while before such lookup happened only if the ingress packet carried a valid internal header csum. rfc v2 -> rfc v3: - fixed typos in macro name and comments - really enforce UDP_GRO_CNT_MAX, instead of UDP_GRO_CNT_MAX + 1 - acquire socket lock in UDP_GRO setsockopt rfc v1 -> rfc v2: - use a new option to enable UDP GRO - use static keys to protect the UDP GRO socket lookup Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/uapi/linux/udp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index 09502de447f5..30baccb6c9c4 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -33,6 +33,7 @@ struct udphdr { #define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ #define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */ #define UDP_SEGMENT 103 /* Set GSO segmentation size */ +#define UDP_GRO 104 /* This socket can receive UDP GRO packets */ /* UDP encapsulation types */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ -- cgit v1.2.3 From b4d3069783bccf0c965468da7db141d359d796fc Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 8 Nov 2018 12:19:16 +0100 Subject: vxlan: Allow configuration of DF behaviour Allow users to set the IPv4 DF bit in outgoing packets, or to inherit its value from the IPv4 inner header. If the encapsulated protocol is IPv6 and DF is configured to be inherited, always set it. For IPv4, inheriting DF from the inner header was probably intended from the very beginning judging by the comment to vxlan_xmit(), but it wasn't actually implemented -- also because it would have done more harm than good, without handling for ICMP Fragmentation Needed messages. According to RFC 7348, "Path MTU discovery MAY be used". An expired RFC draft, draft-saum-nvo3-pmtud-over-vxlan-05, whose purpose was to describe PMTUD implementation, says that "is a MUST that Vxlan gateways [...] SHOULD set the DF-bit [...]", whatever that means. Given this background, the only sane option is probably to let the user decide, and keep the current behaviour as default. This only applies to non-lwt tunnels: if an external control plane is used, tunnel key will still control the DF flag. v2: - DF behaviour configuration only applies for non-lwt tunnels, move DF setting to if (!info) block in vxlan_xmit_one() (Stephen Hemminger) Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 1debfa42cba1..efc588949431 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -533,6 +533,7 @@ enum { IFLA_VXLAN_LABEL, IFLA_VXLAN_GPE, IFLA_VXLAN_TTL_INHERIT, + IFLA_VXLAN_DF, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) @@ -542,6 +543,14 @@ struct ifla_vxlan_port_range { __be16 high; }; +enum ifla_vxlan_df { + VXLAN_DF_UNSET = 0, + VXLAN_DF_SET, + VXLAN_DF_INHERIT, + __VXLAN_DF_END, + VXLAN_DF_MAX = __VXLAN_DF_END - 1, +}; + /* GENEVE section */ enum { IFLA_GENEVE_UNSPEC, -- cgit v1.2.3 From a025fb5f49ad38cf749753b16fcd031d0d678f2b Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 8 Nov 2018 12:19:19 +0100 Subject: geneve: Allow configuration of DF behaviour draft-ietf-nvo3-geneve-08 says: It is strongly RECOMMENDED that Path MTU Discovery ([RFC1191], [RFC1981]) be used by setting the DF bit in the IP header when Geneve packets are transmitted over IPv4 (this is the default with IPv6). Now that ICMP error handling is working for GENEVE, we can comply with this recommendation. Make this configurable, though, to avoid breaking existing setups. By default, DF won't be set. It can be set or inherited from inner IPv4 packets. If it's configured to be inherited and we are encapsulating IPv6, it will be set. This only applies to non-lwt tunnels: if an external control plane is used, tunnel key will still control the DF flag. v2: - DF behaviour configuration only applies for non-lwt tunnels, apply DF setting only if (!geneve->collect_md) in geneve_xmit_skb() (Stephen Hemminger) Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index efc588949431..f42c069d81db 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -566,10 +566,19 @@ enum { IFLA_GENEVE_UDP_ZERO_CSUM6_RX, IFLA_GENEVE_LABEL, IFLA_GENEVE_TTL_INHERIT, + IFLA_GENEVE_DF, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) +enum ifla_geneve_df { + GENEVE_DF_UNSET = 0, + GENEVE_DF_SET, + GENEVE_DF_INHERIT, + __GENEVE_DF_END, + GENEVE_DF_MAX = __GENEVE_DF_END - 1, +}; + /* PPP section */ enum { IFLA_PPP_UNSPEC, -- cgit v1.2.3 From c8123ead13a5c92dc5fd15c0fdfe88eef41e6ac1 Mon Sep 17 00:00:00 2001 From: Nitin Hande Date: Sun, 28 Oct 2018 21:02:45 -0700 Subject: bpf: Extend the sk_lookup() helper to XDP hookpoint. This patch proposes to extend the sk_lookup() BPF API to the XDP hookpoint. The sk_lookup() helper supports a lookup on incoming packet to find the corresponding socket that will receive this packet. Current support for this BPF API is at the tc hookpoint. This patch will extend this API at XDP hookpoint. A XDP program can map the incoming packet to the 5-tuple parameter and invoke the API to find the corresponding socket structure. Signed-off-by: Nitin Hande Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 852dc17ab47a..47d606d744cc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2201,6 +2201,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, *struct bpf_sock* + * return is from reuse->socks[] using hash of the packet. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) * Description @@ -2233,6 +2235,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, *struct bpf_sock* + * return is from reuse->socks[] using hash of the packet. * * int bpf_sk_release(struct bpf_sock *sk) * Description -- cgit v1.2.3 From 9bb7e0f24e7e7d00daa1219b14539e2e602649b2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 10 Sep 2018 13:29:12 +0200 Subject: cfg80211: add peer measurement with FTM initiator API Add a new "peer measurement" API, that can be used to measure certain things related to a peer. Right now, only implement FTM (flight time measurement) over it, but the idea is that it'll be extensible to also support measuring the necessary things to calculate e.g. angle-of-arrival for WiGig. The API is structured to have a generic list of peers and channels to measure with/on, and then for each of those a set of measurements (again, only FTM right now) to perform. Results are sent to the requesting socket, including a final complete message. Closing the controlling netlink socket will abort a running measurement. v3: - add a bit to report "final" for partial results - remove list keeping etc. and just unicast out the results to the requester (big code reduction ...) - also send complete message unicast, and as a result remove the multicast group - separate out struct cfg80211_pmsr_ftm_request_peer from struct cfg80211_pmsr_request_peer - document timeout == 0 if no timeout - disallow setting timeout nl80211 attribute to 0, must not include attribute for no timeout - make MAC address randomization optional - change num bursts exponent default to 0 (1 burst, rather rather than the old default of 15==don't care) v4: - clarify NL80211_ATTR_TIMEOUT documentation v5: - remove unnecessary nl80211 multicast/family changes - remove partial results bit/flag, final is sufficient - add max_bursts_exponent, max_ftms_per_burst to capability - rename "frames per burst" -> "FTMs per burst" v6: - rename cfg80211_pmsr_free_wdev() to cfg80211_pmsr_wdev_down() and call it in leave, so the device can't go down with any pending measurements v7: - wording fixes (Lior) - fix ftm.max_bursts_exponent to allow having the limit of 0 (Lior) v8: - copyright statements - minor coding style fixes - fix error path leak Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 418 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 418 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6d610bae30a9..e45b88925783 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1036,6 +1036,30 @@ * @NL80211_CMD_GET_FTM_RESPONDER_STATS: Retrieve FTM responder statistics, in * the %NL80211_ATTR_FTM_RESPONDER_STATS attribute. * + * @NL80211_CMD_PEER_MEASUREMENT_START: start a (set of) peer measurement(s) + * with the given parameters, which are encapsulated in the nested + * %NL80211_ATTR_PEER_MEASUREMENTS attribute. Optionally, MAC address + * randomization may be enabled and configured by specifying the + * %NL80211_ATTR_MAC and %NL80211_ATTR_MAC_MASK attributes. + * If a timeout is requested, use the %NL80211_ATTR_TIMEOUT attribute. + * A u64 cookie for further %NL80211_ATTR_COOKIE use is is returned in + * the netlink extended ack message. + * + * To cancel a measurement, close the socket that requested it. + * + * Measurement results are reported to the socket that requested the + * measurement using @NL80211_CMD_PEER_MEASUREMENT_RESULT when they + * become available, so applications must ensure a large enough socket + * buffer size. + * + * Depending on driver support it may or may not be possible to start + * multiple concurrent measurements. + * @NL80211_CMD_PEER_MEASUREMENT_RESULT: This command number is used for the + * result notification from the driver to the requesting socket. + * @NL80211_CMD_PEER_MEASUREMENT_COMPLETE: Notification only, indicating that + * the measurement completed, using the measurement cookie + * (%NL80211_ATTR_COOKIE). + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1250,6 +1274,10 @@ enum nl80211_commands { NL80211_CMD_GET_FTM_RESPONDER_STATS, + NL80211_CMD_PEER_MEASUREMENT_START, + NL80211_CMD_PEER_MEASUREMENT_RESULT, + NL80211_CMD_PEER_MEASUREMENT_COMPLETE, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2254,6 +2282,16 @@ enum nl80211_commands { * @NL80211_ATTR_FTM_RESPONDER_STATS: Nested attribute with FTM responder * statistics, see &enum nl80211_ftm_responder_stats. * + * @NL80211_ATTR_TIMEOUT: Timeout for the given operation in milliseconds (u32), + * if the attribute is not given no timeout is requested. Note that 0 is an + * invalid value. + * + * @NL80211_ATTR_PEER_MEASUREMENTS: peer measurements request (and result) + * data, uses nested attributes specified in + * &enum nl80211_peer_measurement_attrs. + * This is also used for capability advertisement in the wiphy information, + * with the appropriate sub-attributes. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2699,6 +2737,10 @@ enum nl80211_attrs { NL80211_ATTR_FTM_RESPONDER_STATS, + NL80211_ATTR_TIMEOUT, + + NL80211_ATTR_PEER_MEASUREMENTS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5906,4 +5948,380 @@ enum nl80211_ftm_responder_stats { NL80211_FTM_STATS_MAX = __NL80211_FTM_STATS_AFTER_LAST - 1 }; +/** + * enum nl80211_preamble - frame preamble types + * @NL80211_PREAMBLE_LEGACY: legacy (HR/DSSS, OFDM, ERP PHY) preamble + * @NL80211_PREAMBLE_HT: HT preamble + * @NL80211_PREAMBLE_VHT: VHT preamble + * @NL80211_PREAMBLE_DMG: DMG preamble + */ +enum nl80211_preamble { + NL80211_PREAMBLE_LEGACY, + NL80211_PREAMBLE_HT, + NL80211_PREAMBLE_VHT, + NL80211_PREAMBLE_DMG, +}; + +/** + * enum nl80211_peer_measurement_type - peer measurement types + * @NL80211_PMSR_TYPE_INVALID: invalid/unused, needed as we use + * these numbers also for attributes + * + * @NL80211_PMSR_TYPE_FTM: flight time measurement + * + * @NUM_NL80211_PMSR_TYPES: internal + * @NL80211_PMSR_TYPE_MAX: highest type number + */ +enum nl80211_peer_measurement_type { + NL80211_PMSR_TYPE_INVALID, + + NL80211_PMSR_TYPE_FTM, + + NUM_NL80211_PMSR_TYPES, + NL80211_PMSR_TYPE_MAX = NUM_NL80211_PMSR_TYPES - 1 +}; + +/** + * enum nl80211_peer_measurement_status - peer measurement status + * @NL80211_PMSR_STATUS_SUCCESS: measurement completed successfully + * @NL80211_PMSR_STATUS_REFUSED: measurement was locally refused + * @NL80211_PMSR_STATUS_TIMEOUT: measurement timed out + * @NL80211_PMSR_STATUS_FAILURE: measurement failed, a type-dependent + * reason may be available in the response data + */ +enum nl80211_peer_measurement_status { + NL80211_PMSR_STATUS_SUCCESS, + NL80211_PMSR_STATUS_REFUSED, + NL80211_PMSR_STATUS_TIMEOUT, + NL80211_PMSR_STATUS_FAILURE, +}; + +/** + * enum nl80211_peer_measurement_req - peer measurement request attributes + * @__NL80211_PMSR_REQ_ATTR_INVALID: invalid + * + * @NL80211_PMSR_REQ_ATTR_DATA: This is a nested attribute with measurement + * type-specific request data inside. The attributes used are from the + * enums named nl80211_peer_measurement__req. + * @NL80211_PMSR_REQ_ATTR_GET_AP_TSF: include AP TSF timestamp, if supported + * (flag attribute) + * + * @NUM_NL80211_PMSR_REQ_ATTRS: internal + * @NL80211_PMSR_REQ_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_req { + __NL80211_PMSR_REQ_ATTR_INVALID, + + NL80211_PMSR_REQ_ATTR_DATA, + NL80211_PMSR_REQ_ATTR_GET_AP_TSF, + + /* keep last */ + NUM_NL80211_PMSR_REQ_ATTRS, + NL80211_PMSR_REQ_ATTR_MAX = NUM_NL80211_PMSR_REQ_ATTRS - 1 +}; + +/** + * enum nl80211_peer_measurement_resp - peer measurement response attributes + * @__NL80211_PMSR_RESP_ATTR_INVALID: invalid + * + * @NL80211_PMSR_RESP_ATTR_DATA: This is a nested attribute with measurement + * type-specific results inside. The attributes used are from the enums + * named nl80211_peer_measurement__resp. + * @NL80211_PMSR_RESP_ATTR_STATUS: u32 value with the measurement status + * (using values from &enum nl80211_peer_measurement_status.) + * @NL80211_PMSR_RESP_ATTR_HOST_TIME: host time (%CLOCK_BOOTTIME) when the + * result was measured; this value is not expected to be accurate to + * more than 20ms. (u64, nanoseconds) + * @NL80211_PMSR_RESP_ATTR_AP_TSF: TSF of the AP that the interface + * doing the measurement is connected to when the result was measured. + * This shall be accurately reported if supported and requested + * (u64, usec) + * @NL80211_PMSR_RESP_ATTR_FINAL: If results are sent to the host partially + * (*e.g. with FTM per-burst data) this flag will be cleared on all but + * the last result; if all results are combined it's set on the single + * result. + * @NL80211_PMSR_RESP_ATTR_PAD: padding for 64-bit attributes, ignore + * + * @NUM_NL80211_PMSR_RESP_ATTRS: internal + * @NL80211_PMSR_RESP_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_resp { + __NL80211_PMSR_RESP_ATTR_INVALID, + + NL80211_PMSR_RESP_ATTR_DATA, + NL80211_PMSR_RESP_ATTR_STATUS, + NL80211_PMSR_RESP_ATTR_HOST_TIME, + NL80211_PMSR_RESP_ATTR_AP_TSF, + NL80211_PMSR_RESP_ATTR_FINAL, + NL80211_PMSR_RESP_ATTR_PAD, + + /* keep last */ + NUM_NL80211_PMSR_RESP_ATTRS, + NL80211_PMSR_RESP_ATTR_MAX = NUM_NL80211_PMSR_RESP_ATTRS - 1 +}; + +/** + * enum nl80211_peer_measurement_peer_attrs - peer attributes for measurement + * @__NL80211_PMSR_PEER_ATTR_INVALID: invalid + * + * @NL80211_PMSR_PEER_ATTR_ADDR: peer's MAC address + * @NL80211_PMSR_PEER_ATTR_CHAN: channel definition, nested, using top-level + * attributes like %NL80211_ATTR_WIPHY_FREQ etc. + * @NL80211_PMSR_PEER_ATTR_REQ: This is a nested attribute indexed by + * measurement type, with attributes from the + * &enum nl80211_peer_measurement_req inside. + * @NL80211_PMSR_PEER_ATTR_RESP: This is a nested attribute indexed by + * measurement type, with attributes from the + * &enum nl80211_peer_measurement_resp inside. + * + * @NUM_NL80211_PMSR_PEER_ATTRS: internal + * @NL80211_PMSR_PEER_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_peer_attrs { + __NL80211_PMSR_PEER_ATTR_INVALID, + + NL80211_PMSR_PEER_ATTR_ADDR, + NL80211_PMSR_PEER_ATTR_CHAN, + NL80211_PMSR_PEER_ATTR_REQ, + NL80211_PMSR_PEER_ATTR_RESP, + + /* keep last */ + NUM_NL80211_PMSR_PEER_ATTRS, + NL80211_PMSR_PEER_ATTR_MAX = NUM_NL80211_PMSR_PEER_ATTRS - 1, +}; + +/** + * enum nl80211_peer_measurement_attrs - peer measurement attributes + * @__NL80211_PMSR_ATTR_INVALID: invalid + * + * @NL80211_PMSR_ATTR_MAX_PEERS: u32 attribute used for capability + * advertisement only, indicates the maximum number of peers + * measurements can be done with in a single request + * @NL80211_PMSR_ATTR_REPORT_AP_TSF: flag attribute in capability + * indicating that the connected AP's TSF can be reported in + * measurement results + * @NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR: flag attribute in capability + * indicating that MAC address randomization is supported. + * @NL80211_PMSR_ATTR_TYPE_CAPA: capabilities reported by the device, + * this contains a nesting indexed by measurement type, and + * type-specific capabilities inside, which are from the enums + * named nl80211_peer_measurement__capa. + * @NL80211_PMSR_ATTR_PEERS: nested attribute, the nesting index is + * meaningless, just a list of peers to measure with, with the + * sub-attributes taken from + * &enum nl80211_peer_measurement_peer_attrs. + * + * @NUM_NL80211_PMSR_ATTR: internal + * @NL80211_PMSR_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_attrs { + __NL80211_PMSR_ATTR_INVALID, + + NL80211_PMSR_ATTR_MAX_PEERS, + NL80211_PMSR_ATTR_REPORT_AP_TSF, + NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR, + NL80211_PMSR_ATTR_TYPE_CAPA, + NL80211_PMSR_ATTR_PEERS, + + /* keep last */ + NUM_NL80211_PMSR_ATTR, + NL80211_PMSR_ATTR_MAX = NUM_NL80211_PMSR_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_capa - FTM capabilities + * @__NL80211_PMSR_FTM_CAPA_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_CAPA_ATTR_ASAP: flag attribute indicating ASAP mode + * is supported + * @NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP: flag attribute indicating non-ASAP + * mode is supported + * @NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI: flag attribute indicating if LCI + * data can be requested during the measurement + * @NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC: flag attribute indicating if civic + * location data can be requested during the measurement + * @NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES: u32 bitmap attribute of bits + * from &enum nl80211_preamble. + * @NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS: bitmap of values from + * &enum nl80211_chan_width indicating the supported channel + * bandwidths for FTM. Note that a higher channel bandwidth may be + * configured to allow for other measurements types with different + * bandwidth requirement in the same measurement. + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT: u32 attribute indicating + * the maximum bursts exponent that can be used (if not present anything + * is valid) + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST: u32 attribute indicating + * the maximum FTMs per burst (if not present anything is valid) + * + * @NUM_NL80211_PMSR_FTM_CAPA_ATTR: internal + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_capa { + __NL80211_PMSR_FTM_CAPA_ATTR_INVALID, + + NL80211_PMSR_FTM_CAPA_ATTR_ASAP, + NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP, + NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI, + NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC, + NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES, + NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST, + + /* keep last */ + NUM_NL80211_PMSR_FTM_CAPA_ATTR, + NL80211_PMSR_FTM_CAPA_ATTR_MAX = NUM_NL80211_PMSR_FTM_CAPA_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_req - FTM request attributes + * @__NL80211_PMSR_FTM_REQ_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_REQ_ATTR_ASAP: ASAP mode requested (flag) + * @NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE: preamble type (see + * &enum nl80211_preamble), optional for DMG (u32) + * @NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP: number of bursts exponent as in + * 802.11-2016 9.4.2.168 "Fine Timing Measurement Parameters element" + * (u8, 0-15, optional with default 15 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD: interval between bursts in units + * of 100ms (u16, optional with default 0) + * @NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION: burst duration, as in 802.11-2016 + * Table 9-257 "Burst Duration field encoding" (u8, 0-15, optional with + * default 15 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST: number of successful FTM frames + * requested per burst + * (u8, 0-31, optional with default 0 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES: number of FTMR frame retries + * (u8, default 3) + * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI: request LCI data (flag) + * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC: request civic location data + * (flag) + * + * @NUM_NL80211_PMSR_FTM_REQ_ATTR: internal + * @NL80211_PMSR_FTM_REQ_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_req { + __NL80211_PMSR_FTM_REQ_ATTR_INVALID, + + NL80211_PMSR_FTM_REQ_ATTR_ASAP, + NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE, + NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP, + NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD, + NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION, + NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST, + NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES, + NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI, + NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC, + + /* keep last */ + NUM_NL80211_PMSR_FTM_REQ_ATTR, + NL80211_PMSR_FTM_REQ_ATTR_MAX = NUM_NL80211_PMSR_FTM_REQ_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_failure_reasons - FTM failure reasons + * @NL80211_PMSR_FTM_FAILURE_UNSPECIFIED: unspecified failure, not used + * @NL80211_PMSR_FTM_FAILURE_NO_RESPONSE: no response from the FTM responder + * @NL80211_PMSR_FTM_FAILURE_REJECTED: FTM responder rejected measurement + * @NL80211_PMSR_FTM_FAILURE_WRONG_CHANNEL: we already know the peer is + * on a different channel, so can't measure (if we didn't know, we'd + * try and get no response) + * @NL80211_PMSR_FTM_FAILURE_PEER_NOT_CAPABLE: peer can't actually do FTM + * @NL80211_PMSR_FTM_FAILURE_INVALID_TIMESTAMP: invalid T1/T4 timestamps + * received + * @NL80211_PMSR_FTM_FAILURE_PEER_BUSY: peer reports busy, you may retry + * later (see %NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME) + * @NL80211_PMSR_FTM_FAILURE_BAD_CHANGED_PARAMS: parameters were changed + * by the peer and are no longer supported + */ +enum nl80211_peer_measurement_ftm_failure_reasons { + NL80211_PMSR_FTM_FAILURE_UNSPECIFIED, + NL80211_PMSR_FTM_FAILURE_NO_RESPONSE, + NL80211_PMSR_FTM_FAILURE_REJECTED, + NL80211_PMSR_FTM_FAILURE_WRONG_CHANNEL, + NL80211_PMSR_FTM_FAILURE_PEER_NOT_CAPABLE, + NL80211_PMSR_FTM_FAILURE_INVALID_TIMESTAMP, + NL80211_PMSR_FTM_FAILURE_PEER_BUSY, + NL80211_PMSR_FTM_FAILURE_BAD_CHANGED_PARAMS, +}; + +/** + * enum nl80211_peer_measurement_ftm_resp - FTM response attributes + * @__NL80211_PMSR_FTM_RESP_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON: FTM-specific failure reason + * (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_BURST_INDEX: optional, if bursts are reported + * as separate results then it will be the burst index 0...(N-1) and + * the top level will indicate partial results (u32) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS: number of FTM Request frames + * transmitted (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES: number of FTM Request frames + * that were acknowleged (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME: retry time received from the + * busy peer (u32, seconds) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP: actual number of bursts exponent + * used by the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_BURST_DURATION: actual burst duration used by + * the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_FTMS_PER_BURST: actual FTMs per burst used + * by the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_RSSI_AVG: average RSSI across all FTM action + * frames (optional, s32, 1/2 dBm) + * @NL80211_PMSR_FTM_RESP_ATTR_RSSI_SPREAD: RSSI spread across all FTM action + * frames (optional, s32, 1/2 dBm) + * @NL80211_PMSR_FTM_RESP_ATTR_TX_RATE: bitrate we used for the response to the + * FTM action frame (optional, nested, using &enum nl80211_rate_info + * attributes) + * @NL80211_PMSR_FTM_RESP_ATTR_RX_RATE: bitrate the responder used for the FTM + * action frame (optional, nested, using &enum nl80211_rate_info attrs) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_AVG: average RTT (s64, picoseconds, optional + * but one of RTT/DIST must be present) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_VARIANCE: RTT variance (u64, ps^2, note that + * standard deviation is the square root of variance, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_SPREAD: RTT spread (u64, picoseconds, + * optional) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_AVG: average distance (s64, mm, optional + * but one of RTT/DIST must be present) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE: distance variance (u64, mm^2, note + * that standard deviation is the square root of variance, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD: distance spread (u64, mm, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_LCI: LCI data from peer (binary, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC: civic location data from peer + * (binary, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_PAD: ignore, for u64/s64 padding only + * + * @NUM_NL80211_PMSR_FTM_RESP_ATTR: internal + * @NL80211_PMSR_FTM_RESP_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_resp { + __NL80211_PMSR_FTM_RESP_ATTR_INVALID, + + NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON, + NL80211_PMSR_FTM_RESP_ATTR_BURST_INDEX, + NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS, + NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES, + NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME, + NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP, + NL80211_PMSR_FTM_RESP_ATTR_BURST_DURATION, + NL80211_PMSR_FTM_RESP_ATTR_FTMS_PER_BURST, + NL80211_PMSR_FTM_RESP_ATTR_RSSI_AVG, + NL80211_PMSR_FTM_RESP_ATTR_RSSI_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_TX_RATE, + NL80211_PMSR_FTM_RESP_ATTR_RX_RATE, + NL80211_PMSR_FTM_RESP_ATTR_RTT_AVG, + NL80211_PMSR_FTM_RESP_ATTR_RTT_VARIANCE, + NL80211_PMSR_FTM_RESP_ATTR_RTT_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_DIST_AVG, + NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE, + NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_LCI, + NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC, + NL80211_PMSR_FTM_RESP_ATTR_PAD, + + /* keep last */ + NUM_NL80211_PMSR_FTM_RESP_ATTR, + NL80211_PMSR_FTM_RESP_ATTR_MAX = NUM_NL80211_PMSR_FTM_RESP_ATTR - 1 +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From dbdaee7aa6e61f56aac61b71a7807e76f92cc895 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 25 Oct 2018 15:48:53 -0400 Subject: {nl,mac}80211: report gate connectivity in station info Capture the current state of gate connectivity from the mesh formation field in mesh config whenever we receive a beacon, and report that via GET_STATION. This allows applications doing mesh peering in userspace to make peering decisions based on peers' current upstream connectivity. Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e45b88925783..ff6005edf32f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3116,6 +3116,8 @@ enum nl80211_sta_bss_param { * with an FCS error (u32, from this station). This count may not include * some packets with an FCS error due to TA corruption. Hence this counter * might not be fully accurate. + * @NL80211_STA_INFO_CONNECTED_TO_GATE: set to true if STA has a path to a + * mesh gate * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -3158,6 +3160,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_ACK_SIGNAL_AVG, NL80211_STA_INFO_RX_MPDUS, NL80211_STA_INFO_FCS_ERROR_COUNT, + NL80211_STA_INFO_CONNECTED_TO_GATE, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, -- cgit v1.2.3 From 01d66fbd5b18ac9f01a6a2ae1278189d19208ad5 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 25 Oct 2018 17:36:34 -0400 Subject: {nl,mac}80211: add dot11MeshConnectedToMeshGate to meshconf When userspace is controlling mesh routing, it may have better knowledge about whether a mesh STA is connected to a mesh gate than the kernel mpath table. Add dot11MeshConnectedToMeshGate to the mesh config so that such applications can explicitly signal that a mesh STA is connected to a gate, which will then be advertised in the beacon. Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ff6005edf32f..51bd85b7d839 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3117,7 +3117,7 @@ enum nl80211_sta_bss_param { * some packets with an FCS error due to TA corruption. Hence this counter * might not be fully accurate. * @NL80211_STA_INFO_CONNECTED_TO_GATE: set to true if STA has a path to a - * mesh gate + * mesh gate (u8, 0 or 1) * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -3940,6 +3940,11 @@ enum nl80211_mesh_power_mode { * remove it from the STA's list of peers. You may set this to 0 to disable * the removal of the STA. Default is 30 minutes. * + * @NL80211_MESHCONF_CONNECTED_TO_GATE: If set to true then this mesh STA + * will advertise that it is connected to a gate in the mesh formation + * field. If left unset then the mesh formation field will only + * advertise such if there is an active root mesh path. + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -3972,6 +3977,7 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_POWER_MODE, NL80211_MESHCONF_AWAKE_WINDOW, NL80211_MESHCONF_PLINK_TIMEOUT, + NL80211_MESHCONF_CONNECTED_TO_GATE, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, -- cgit v1.2.3 From 361800876f80da3915c46e388fc682532228b2c3 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 9 Nov 2018 11:14:44 +0100 Subject: ptp: add PTP_SYS_OFFSET_EXTENDED ioctl The PTP_SYS_OFFSET ioctl, which can be used to measure the offset between a PHC and the system clock, includes the total time that the driver needs to read the PHC timestamp. This typically involves reading of multiple PCI registers (sometimes in multiple iterations) and the register that contains the lowest bits of the timestamp is not read in the middle between the two readings of the system clock. This asymmetry causes the measured offset to have a significant error. Introduce a new ioctl, driver function, and helper functions, which allow the reading of the lowest register to be isolated from the other readings in order to reduce the asymmetry. The ioctl returns three timestamps for each measurement: - system time right before reading the lowest bits of the PHC timestamp - PHC time - system time immediately after reading the lowest bits of the PHC timestamp Cc: Richard Cochran Cc: Jacob Keller Cc: Marcelo Tosatti Signed-off-by: Miroslav Lichvar Signed-off-by: David S. Miller --- include/uapi/linux/ptp_clock.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 3039bf6a742e..d73d83950265 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -84,6 +84,16 @@ struct ptp_sys_offset { struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1]; }; +struct ptp_sys_offset_extended { + unsigned int n_samples; /* Desired number of measurements. */ + unsigned int rsv[3]; /* Reserved for future use. */ + /* + * Array of [system, phc, system] time stamps. The kernel will provide + * 3*n_samples time stamps. + */ + struct ptp_clock_time ts[PTP_MAX_SAMPLES][3]; +}; + struct ptp_sys_offset_precise { struct ptp_clock_time device; struct ptp_clock_time sys_realtime; @@ -136,6 +146,8 @@ struct ptp_pin_desc { #define PTP_PIN_SETFUNC _IOW(PTP_CLK_MAGIC, 7, struct ptp_pin_desc) #define PTP_SYS_OFFSET_PRECISE \ _IOWR(PTP_CLK_MAGIC, 8, struct ptp_sys_offset_precise) +#define PTP_SYS_OFFSET_EXTENDED \ + _IOW(PTP_CLK_MAGIC, 9, struct ptp_sys_offset_extended) struct ptp_extts_event { struct ptp_clock_time t; /* Time event occured. */ -- cgit v1.2.3 From 48872c11b77271ef9b070bdc50afe6655c4eb9aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 11 Nov 2018 09:11:31 -0800 Subject: net_sched: sch_fq: add dctcp-like marking Similar to 80ba92fa1a92 ("codel: add ce_threshold attribute") After EDT adoption, it became easier to implement DCTCP-like CE marking. In many cases, queues are not building in the network fabric but on the hosts themselves. If packets leaving fq missed their Earliest Departure Time by XXX usec, we mark them with ECN CE. This gives a feedback (after one RTT) to the sender to slow down and find better operating mode. Example : tc qd replace dev eth0 root fq ce_threshold 2.5ms Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 89ee47c2f17d..ee017bc057a3 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -864,6 +864,8 @@ enum { TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */ + TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */ + __TCA_FQ_MAX }; @@ -882,6 +884,7 @@ struct tc_fq_qd_stats { __u32 inactive_flows; __u32 throttled_flows; __u32 unthrottle_latency_ns; + __u64 ce_mark; /* packets above ce_threshold */ }; /* Heavy-Hitter Filter */ -- cgit v1.2.3 From 5c72299fba9df407c2f2994e194edebf878996ee Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Mon, 12 Nov 2018 16:15:55 -0800 Subject: net: sched: cls_flower: Classify packets using port ranges Added support in tc flower for filtering based on port ranges. Example: 1. Match on a port range: ------------------------- $ tc filter add dev enp4s0 protocol ip parent ffff:\ prio 1 flower ip_proto tcp dst_port range 20-30 skip_hw\ action drop $ tc -s filter show dev enp4s0 parent ffff: filter protocol ip pref 1 flower chain 0 filter protocol ip pref 1 flower chain 0 handle 0x1 eth_type ipv4 ip_proto tcp dst_port range 20-30 skip_hw not_in_hw action order 1: gact action drop random type none pass val 0 index 1 ref 1 bind 1 installed 85 sec used 3 sec Action statistics: Sent 460 bytes 10 pkt (dropped 10, overlimits 0 requeues 0) backlog 0b 0p requeues 0 2. Match on IP address and port range: -------------------------------------- $ tc filter add dev enp4s0 protocol ip parent ffff:\ prio 1 flower dst_ip 192.168.1.1 ip_proto tcp dst_port range 100-200\ skip_hw action drop $ tc -s filter show dev enp4s0 parent ffff: filter protocol ip pref 1 flower chain 0 handle 0x2 eth_type ipv4 ip_proto tcp dst_ip 192.168.1.1 dst_port range 100-200 skip_hw not_in_hw action order 1: gact action drop random type none pass val 0 index 2 ref 1 bind 1 installed 58 sec used 2 sec Action statistics: Sent 920 bytes 20 pkt (dropped 20, overlimits 0 requeues 0) backlog 0b 0p requeues 0 v4: 1. Added condition before setting port key. 2. Organized setting and dumping port range keys into functions and added validation of input range. v3: 1. Moved new fields in UAPI enum to the end of enum. 2. Removed couple of empty lines. v2: Addressed Jiri's comments: 1. Added separate functions for dst and src comparisons. 2. Removed endpoint enum. 3. Added new bit TCA_FLOWER_FLAGS_RANGE to decide normal/range lookup. 4. Cleaned up fl_lookup function. Signed-off-by: Amritha Nambiar Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 401d0c1e612d..95d0db2a8350 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -485,6 +485,11 @@ enum { TCA_FLOWER_IN_HW_COUNT, + TCA_FLOWER_KEY_PORT_SRC_MIN, /* be16 */ + TCA_FLOWER_KEY_PORT_SRC_MAX, /* be16 */ + TCA_FLOWER_KEY_PORT_DST_MIN, /* be16 */ + TCA_FLOWER_KEY_PORT_DST_MAX, /* be16 */ + __TCA_FLOWER_MAX, }; @@ -518,6 +523,8 @@ enum { TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), }; +#define TCA_FLOWER_MASK_FLAGS_RANGE (1 << 0) /* Range-based match */ + /* Match-all classifier */ enum { -- cgit v1.2.3 From 80e22e961dfd15530215f6f6dcd94cd8f65ba1ea Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 14 Nov 2018 22:23:49 -0800 Subject: net: sched: gred: provide a better structured dump and expose stats Currently all GRED's virtual queue data is dumped in a single array in a single attribute. This makes it pretty much impossible to add new fields. In order to expose more detailed stats add a new set of attributes. We can now expose the 64 bit value of bytesin and all the mark stats which were not part of the original design. Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index ee017bc057a3..c8f717346b60 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -291,11 +291,37 @@ enum { TCA_GRED_DPS, TCA_GRED_MAX_P, TCA_GRED_LIMIT, + TCA_GRED_VQ_LIST, /* nested TCA_GRED_VQ_ENTRY */ __TCA_GRED_MAX, }; #define TCA_GRED_MAX (__TCA_GRED_MAX - 1) +enum { + TCA_GRED_VQ_ENTRY_UNSPEC, + TCA_GRED_VQ_ENTRY, /* nested TCA_GRED_VQ_* */ + __TCA_GRED_VQ_ENTRY_MAX, +}; +#define TCA_GRED_VQ_ENTRY_MAX (__TCA_GRED_VQ_ENTRY_MAX - 1) + +enum { + TCA_GRED_VQ_UNSPEC, + TCA_GRED_VQ_PAD, + TCA_GRED_VQ_DP, /* u32 */ + TCA_GRED_VQ_STAT_BYTES, /* u64 */ + TCA_GRED_VQ_STAT_PACKETS, /* u32 */ + TCA_GRED_VQ_STAT_BACKLOG, /* u32 */ + TCA_GRED_VQ_STAT_PROB_DROP, /* u32 */ + TCA_GRED_VQ_STAT_PROB_MARK, /* u32 */ + TCA_GRED_VQ_STAT_FORCED_DROP, /* u32 */ + TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */ + TCA_GRED_VQ_STAT_PDROP, /* u32 */ + TCA_GRED_VQ_STAT_OTHER, /* u32 */ + __TCA_GRED_VQ_MAX +}; + +#define TCA_GRED_VQ_MAX (__TCA_GRED_VQ_MAX - 1) + struct tc_gred_qopt { __u32 limit; /* HARD maximal queue length (bytes) */ __u32 qth_min; /* Min average length threshold (bytes) */ -- cgit v1.2.3 From 72111015024f4eddb5aac400ddbe38a4f8f0279a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 14 Nov 2018 22:23:51 -0800 Subject: net: sched: gred: allow manipulating per-DP RED flags Allow users to set and dump RED flags (ECN enabled and harddrop) on per-virtual queue basis. Validation of attributes is split from changes to make sure we won't have to undo previous operations when we find out configuration is invalid. The objective is to allow changing per-Qdisc parameters without overwriting the per-vq configured flags. Old user space will not pass the TCA_GRED_VQ_FLAGS attribute and per-Qdisc flags will always get propagated to the virtual queues. New user space which wants to make use of per-vq flags should set per-Qdisc flags to 0 and then configure per-vq flags as it sees fit. Once per-vq flags are set per-Qdisc flags can't be changed to non-zero. Vice versa - if the per-Qdisc flags are non-zero the TCA_GRED_VQ_FLAGS attribute has to either be omitted or set to the same value as per-Qdisc flags. Update per-Qdisc parameters: per-Qdisc | per-VQ | result 0 | 0 | all vq flags updated 0 | non-0 | error (vq flags in use) non-0 | 0 | -- impossible -- non-0 | non-0 | all vq flags updated Update per-VQ state (flags parameter not specified): no change to flags Update per-VQ state (flags parameter set): per-Qdisc | per-VQ | result 0 | any | per-vq flags updated non-0 | 0 | -- impossible -- non-0 | non-0 | error (per-Qdisc flags in use) Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index c8f717346b60..0d18b1d1fbbc 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -317,6 +317,7 @@ enum { TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */ TCA_GRED_VQ_STAT_PDROP, /* u32 */ TCA_GRED_VQ_STAT_OTHER, /* u32 */ + TCA_GRED_VQ_FLAGS, /* u32 */ __TCA_GRED_VQ_MAX }; -- cgit v1.2.3 From 54e8cb786130949a4d37792383cb528176771e5d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 15 Nov 2018 15:26:51 -0800 Subject: uapi/ethtool: fix spelling errors Trivial spelling errors found by codespell. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index c8f8e2455bf3..17be76aeb468 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -882,7 +882,7 @@ struct ethtool_rx_flow_spec { __u32 location; }; -/* How rings are layed out when accessing virtual functions or +/* How rings are laid out when accessing virtual functions or * offloaded queues is device specific. To allow users to do flow * steering and specify these queues the ring cookie is partitioned * into a 32bit queue index with an 8 bit virtual function id. @@ -891,7 +891,7 @@ struct ethtool_rx_flow_spec { * devices start supporting PCIe w/ARI. However at the moment I * do not know of any devices that support this so I do not reserve * space for this at this time. If a future patch consumes the next - * byte it should be aware of this possiblity. + * byte it should be aware of this possibility. */ #define ETHTOOL_RX_FLOW_SPEC_RING 0x00000000FFFFFFFFLL #define ETHTOOL_RX_FLOW_SPEC_RING_VF 0x000000FF00000000LL -- cgit v1.2.3 From e8bd8fca6773ef49390269bd467bf940a0841ccf Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Thu, 15 Nov 2018 16:44:12 -0800 Subject: tcp: add SRTT to SCM_TIMESTAMPING_OPT_STATS Add TCP_NLA_SRTT to SCM_TIMESTAMPING_OPT_STATS that reports the smoothed round trip time in microseconds (tcp_sock.srtt_us >> 3). Signed-off-by: Yousuk Seung Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index e02d31986ff9..8bb6cc5f3235 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -266,6 +266,7 @@ enum { TCP_NLA_BYTES_RETRANS, /* Data bytes retransmitted */ TCP_NLA_DSACK_DUPS, /* DSACK blocks received */ TCP_NLA_REORD_SEEN, /* reordering events seen */ + TCP_NLA_SRTT, /* smoothed RTT in usecs */ }; /* for TCP_MD5SIG socket option */ -- cgit v1.2.3 From 8d951a75d022d94a05f5fa74217670a981e8302d Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Fri, 16 Nov 2018 15:51:59 +1100 Subject: net/ncsi: Configure multi-package, multi-channel modes with failover This patch extends the ncsi-netlink interface with two new commands and three new attributes to configure multiple packages and/or channels at once, and configure specific failover modes. NCSI_CMD_SET_PACKAGE mask and NCSI_CMD_SET_CHANNEL_MASK set a whitelist of packages or channels allowed to be configured with the NCSI_ATTR_PACKAGE_MASK and NCSI_ATTR_CHANNEL_MASK attributes respectively. If one of these whitelists is set only packages or channels matching the whitelist are considered for the channel queue in ncsi_choose_active_channel(). These commands may also use the NCSI_ATTR_MULTI_FLAG to signal that multiple packages or channels may be configured simultaneously. NCSI hardware arbitration (HWA) must be available in order to enable multi-package mode. Multi-channel mode is always available. If the NCSI_ATTR_CHANNEL_ID attribute is present in the NCSI_CMD_SET_CHANNEL_MASK command the it sets the preferred channel as with the NCSI_CMD_SET_INTERFACE command. The combination of preferred channel and channel whitelist defines a primary channel and the allowed failover channels. If the NCSI_ATTR_MULTI_FLAG attribute is also present then the preferred channel is configured for Tx/Rx and the other channels are enabled only for Rx. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- include/uapi/linux/ncsi.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ncsi.h b/include/uapi/linux/ncsi.h index 0a26a5576645..a3f87c54fdb3 100644 --- a/include/uapi/linux/ncsi.h +++ b/include/uapi/linux/ncsi.h @@ -26,6 +26,12 @@ * @NCSI_CMD_SEND_CMD: send NC-SI command to network card. * Requires NCSI_ATTR_IFINDEX, NCSI_ATTR_PACKAGE_ID * and NCSI_ATTR_CHANNEL_ID. + * @NCSI_CMD_SET_PACKAGE_MASK: set a whitelist of allowed packages. + * Requires NCSI_ATTR_IFINDEX and NCSI_ATTR_PACKAGE_MASK. + * @NCSI_CMD_SET_CHANNEL_MASK: set a whitelist of allowed channels. + * Requires NCSI_ATTR_IFINDEX, NCSI_ATTR_PACKAGE_ID, and + * NCSI_ATTR_CHANNEL_MASK. If NCSI_ATTR_CHANNEL_ID is present it sets + * the primary channel. * @NCSI_CMD_MAX: highest command number */ enum ncsi_nl_commands { @@ -34,6 +40,8 @@ enum ncsi_nl_commands { NCSI_CMD_SET_INTERFACE, NCSI_CMD_CLEAR_INTERFACE, NCSI_CMD_SEND_CMD, + NCSI_CMD_SET_PACKAGE_MASK, + NCSI_CMD_SET_CHANNEL_MASK, __NCSI_CMD_AFTER_LAST, NCSI_CMD_MAX = __NCSI_CMD_AFTER_LAST - 1 @@ -48,6 +56,10 @@ enum ncsi_nl_commands { * @NCSI_ATTR_PACKAGE_ID: package ID * @NCSI_ATTR_CHANNEL_ID: channel ID * @NCSI_ATTR_DATA: command payload + * @NCSI_ATTR_MULTI_FLAG: flag to signal that multi-mode should be enabled with + * NCSI_CMD_SET_PACKAGE_MASK or NCSI_CMD_SET_CHANNEL_MASK. + * @NCSI_ATTR_PACKAGE_MASK: 32-bit mask of allowed packages. + * @NCSI_ATTR_CHANNEL_MASK: 32-bit mask of allowed channels. * @NCSI_ATTR_MAX: highest attribute number */ enum ncsi_nl_attrs { @@ -57,6 +69,9 @@ enum ncsi_nl_attrs { NCSI_ATTR_PACKAGE_ID, NCSI_ATTR_CHANNEL_ID, NCSI_ATTR_DATA, + NCSI_ATTR_MULTI_FLAG, + NCSI_ATTR_PACKAGE_MASK, + NCSI_ATTR_CHANNEL_MASK, __NCSI_ATTR_AFTER_LAST, NCSI_ATTR_MAX = __NCSI_ATTR_AFTER_LAST - 1 -- cgit v1.2.3 From 2cc0eeb67636e0339ad7b6cdfa305f63983642af Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 18 Nov 2018 16:08:51 +0800 Subject: sctp: define subscribe in sctp_sock as __u16 The member subscribe in sctp_sock is used to indicate to which of the events it is subscribed, more like a group of flags. So it's better to be defined as __u16 (2 bytpes), instead of struct sctp_event_subscribe (13 bytes). Note that sctp_event_subscribe is an UAPI struct, used on sockopt calls, and thus it will not be removed. This patch only changes the internal storage of the flags. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index c81feb373d3e..66afa5b4ab6b 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -632,7 +632,9 @@ union sctp_notification { */ enum sctp_sn_type { - SCTP_SN_TYPE_BASE = (1<<15), + SCTP_SN_TYPE_BASE = (1<<15), + SCTP_DATA_IO_EVENT = SCTP_SN_TYPE_BASE, +#define SCTP_DATA_IO_EVENT SCTP_DATA_IO_EVENT SCTP_ASSOC_CHANGE, #define SCTP_ASSOC_CHANGE SCTP_ASSOC_CHANGE SCTP_PEER_ADDR_CHANGE, @@ -657,6 +659,8 @@ enum sctp_sn_type { #define SCTP_ASSOC_RESET_EVENT SCTP_ASSOC_RESET_EVENT SCTP_STREAM_CHANGE_EVENT, #define SCTP_STREAM_CHANGE_EVENT SCTP_STREAM_CHANGE_EVENT + SCTP_SN_TYPE_MAX = SCTP_STREAM_CHANGE_EVENT, +#define SCTP_SN_TYPE_MAX SCTP_SN_TYPE_MAX }; /* Notification error codes used to fill up the error fields in some -- cgit v1.2.3 From 480ba9c18a27ff77b02a2012e50dfd3e20ee9f7a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 18 Nov 2018 16:08:54 +0800 Subject: sctp: add sockopt SCTP_EVENT This patch adds sockopt SCTP_EVENT described in rfc6525#section-6.2. With this sockopt users can subscribe to an event from a specified asoc. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 66afa5b4ab6b..d584073532b8 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -129,6 +129,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_STREAM_SCHEDULER_VALUE 124 #define SCTP_INTERLEAVING_SUPPORTED 125 #define SCTP_SENDMSG_CONNECT 126 +#define SCTP_EVENT 127 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -1154,6 +1155,12 @@ struct sctp_add_streams { uint16_t sas_outstrms; }; +struct sctp_event { + sctp_assoc_t se_assoc_id; + uint16_t se_type; + uint8_t se_on; +}; + /* SCTP Stream schedulers */ enum sctp_sched_type { SCTP_SS_FCFS, -- cgit v1.2.3 From 96b3b6c9091d23289721350e32c63cc8749686be Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 16 Nov 2018 11:41:08 +0000 Subject: bpf: allow zero-initializing hash map seed Add a new flag BPF_F_ZERO_SEED, which forces a hash map to initialize the seed to zero. This is useful when doing performance analysis both on individual BPF programs, as well as the kernel's hash table implementation. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 47d606d744cc..8c01b89a4cb4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -269,6 +269,9 @@ enum bpf_attach_type { /* Flag for stack_map, store build_id+offset instead of pointer */ #define BPF_F_STACK_BUILD_ID (1U << 5) +/* Zero-initialize hash function seed. This should only be used for testing. */ +#define BPF_F_ZERO_SEED (1U << 6) + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, -- cgit v1.2.3 From 2f1833607aed6a9c1e1729bf0e2588c341ceb409 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 16 Nov 2018 11:41:09 +0000 Subject: bpf: move BPF_F_QUERY_EFFECTIVE after map flags BPF_F_QUERY_EFFECTIVE is in the middle of the flags valid for BPF_MAP_CREATE. Move it to its own section to reduce confusion. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8c01b89a4cb4..05d95290b848 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -257,9 +257,6 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) -/* flags for BPF_PROG_QUERY */ -#define BPF_F_QUERY_EFFECTIVE (1U << 0) - #define BPF_OBJ_NAME_LEN 16U /* Flags for accessing BPF object */ @@ -272,6 +269,9 @@ enum bpf_attach_type { /* Zero-initialize hash function seed. This should only be used for testing. */ #define BPF_F_ZERO_SEED (1U << 6) +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, -- cgit v1.2.3 From 2667a2626f4da370409c2830552f6e8c8b8c41e2 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 19 Nov 2018 15:29:08 -0800 Subject: bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO This patch adds BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO to support the function debug info. BTF_KIND_FUNC_PROTO must not have a name (i.e. !t->name_off) and it is followed by >= 0 'struct bpf_param' objects to describe the function arguments. The BTF_KIND_FUNC must have a valid name and it must refer back to a BTF_KIND_FUNC_PROTO. The above is the conclusion after the discussion between Edward Cree, Alexei, Daniel, Yonghong and Martin. By combining BTF_KIND_FUNC and BTF_LIND_FUNC_PROTO, a complete function signature can be obtained. It will be used in the later patches to learn the function signature of a running bpf program. Signed-off-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/uapi/linux/btf.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 972265f32871..14f66948fc95 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -40,7 +40,8 @@ struct btf_type { /* "size" is used by INT, ENUM, STRUCT and UNION. * "size" tells the size of the type it is describing. * - * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT. + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, + * FUNC and FUNC_PROTO. * "type" is a type_id referring to another type. */ union { @@ -64,8 +65,10 @@ struct btf_type { #define BTF_KIND_VOLATILE 9 /* Volatile */ #define BTF_KIND_CONST 10 /* Const */ #define BTF_KIND_RESTRICT 11 /* Restrict */ -#define BTF_KIND_MAX 11 -#define NR_BTF_KINDS 12 +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#define BTF_KIND_MAX 13 +#define NR_BTF_KINDS 14 /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. @@ -110,4 +113,13 @@ struct btf_member { __u32 offset; /* offset in bits */ }; +/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". + * The exact number of btf_param is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_param { + __u32 name_off; + __u32 type; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ -- cgit v1.2.3 From 838e96904ff3fc6c30e5ebbc611474669856e3c0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 19 Nov 2018 15:29:11 -0800 Subject: bpf: Introduce bpf_func_info This patch added interface to load a program with the following additional information: . prog_btf_fd . func_info, func_info_rec_size and func_info_cnt where func_info will provide function range and type_id corresponding to each function. The func_info_rec_size is introduced in the UAPI to specify struct bpf_func_info size passed from user space. This intends to make bpf_func_info structure growable in the future. If the kernel gets a different bpf_func_info size from userspace, it will try to handle user request with part of bpf_func_info it can understand. In this patch, kernel can understand struct bpf_func_info { __u32 insn_offset; __u32 type_id; }; If user passed a bpf func_info record size of 16 bytes, the kernel can still handle part of records with the above definition. If verifier agrees with function range provided by the user, the bpf_prog ksym for each function will use the func name provided in the type_id, which is supposed to provide better encoding as it is not limited by 16 bytes program name limitation and this is better for bpf program which contains multiple subprograms. The bpf_prog_info interface is also extended to return btf_id, func_info, func_info_rec_size and func_info_cnt to userspace, so userspace can print out the function prototype for each xlated function. The insn_offset in the returned func_info corresponds to the insn offset for xlated functions. With other jit related fields in bpf_prog_info, userspace can also print out function prototypes for each jited function. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 05d95290b848..c1554aa07465 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -338,6 +338,10 @@ union bpf_attr { * (context accesses, allowed helpers, etc). */ __u32 expected_attach_type; + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -2638,6 +2642,10 @@ struct bpf_prog_info { __u32 nr_jited_func_lens; __aligned_u64 jited_ksyms; __aligned_u64 jited_func_lens; + __u32 btf_id; + __u32 func_info_rec_size; + __aligned_u64 func_info; + __u32 func_info_cnt; } __attribute__((aligned(8))); struct bpf_map_info { @@ -2949,4 +2957,9 @@ struct bpf_flow_keys { }; }; +struct bpf_func_info { + __u32 insn_offset; + __u32 type_id; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From f11216b24219ab26d8d159fbfa12dff886b16e32 Mon Sep 17 00:00:00 2001 From: Vlad Dumitrescu Date: Thu, 22 Nov 2018 14:39:16 -0500 Subject: bpf: add skb->tstamp r/w access from tc clsact and cg skb progs This could be used to rate limit egress traffic in concert with a qdisc which supports Earliest Departure Time, such as FQ. Write access from cg skb progs only with CAP_SYS_ADMIN, since the value will be used by downstream qdiscs. It might make sense to relax this. Changes v1 -> v2: - allow access from cg skb, write only with CAP_SYS_ADMIN Signed-off-by: Vlad Dumitrescu Acked-by: Eric Dumazet Acked-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c1554aa07465..23e2031a43d4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2468,6 +2468,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; + __u64 tstamp; }; struct bpf_tunnel_key { -- cgit v1.2.3 From 89a9157e1253bb3384a7596cb51bf1ceeac063ed Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:18 +0800 Subject: virtio: add packed ring types and macros Add types and macros for packed ring. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- include/uapi/linux/virtio_config.h | 3 +++ include/uapi/linux/virtio_ring.h | 52 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index 449132c76b1c..1196e1c1d4f6 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -75,6 +75,9 @@ */ #define VIRTIO_F_IOMMU_PLATFORM 33 +/* This feature indicates support for the packed virtqueue layout. */ +#define VIRTIO_F_RING_PACKED 34 + /* * Does the device support Single Root I/O Virtualization? */ diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 6d5d5faa989b..2414f8af26b3 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -44,6 +44,13 @@ /* This means the buffer contains a list of buffer descriptors. */ #define VRING_DESC_F_INDIRECT 4 +/* + * Mark a descriptor as available or used in packed ring. + * Notice: they are defined as shifts instead of shifted values. + */ +#define VRING_PACKED_DESC_F_AVAIL 7 +#define VRING_PACKED_DESC_F_USED 15 + /* The Host uses this in used->flags to advise the Guest: don't kick me when * you add a buffer. It's unreliable, so it's simply an optimization. Guest * will still kick if it's out of buffers. */ @@ -53,6 +60,23 @@ * optimization. */ #define VRING_AVAIL_F_NO_INTERRUPT 1 +/* Enable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_ENABLE 0x0 +/* Disable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_DISABLE 0x1 +/* + * Enable events for a specific descriptor in packed ring. + * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). + * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated. + */ +#define VRING_PACKED_EVENT_FLAG_DESC 0x2 + +/* + * Wrap counter bit shift in event suppression structure + * of packed ring. + */ +#define VRING_PACKED_EVENT_F_WRAP_CTR 15 + /* We support indirect buffer descriptors */ #define VIRTIO_RING_F_INDIRECT_DESC 28 @@ -171,4 +195,32 @@ static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); } +struct vring_packed_desc_event { + /* Descriptor Ring Change Event Offset/Wrap Counter. */ + __le16 off_wrap; + /* Descriptor Ring Change Event Flags. */ + __le16 flags; +}; + +struct vring_packed_desc { + /* Buffer Address. */ + __le64 addr; + /* Buffer Length. */ + __le32 len; + /* Buffer ID. */ + __le16 id; + /* The flags depending on descriptor type. */ + __le16 flags; +}; + +struct vring_packed { + unsigned int num; + + struct vring_packed_desc *desc; + + struct vring_packed_desc_event *driver; + + struct vring_packed_desc_event *device; +}; + #endif /* _UAPI_LINUX_VIRTIO_RING_H */ -- cgit v1.2.3 From a428afe82f98d2ffb31c981671630df1fa25906f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 24 Nov 2018 04:34:20 +0200 Subject: net: bridge: add support for user-controlled bool options We have been adding many new bridge options, a big number of which are boolean but still take up netlink attribute ids and waste space in the skb. Recently we discussed learning from link-local packets[1] and decided yet another new boolean option will be needed, thus introducing this API to save some bridge nl space. The API supports changing the value of multiple boolean options at once via the br_boolopt_multi struct which has an optmask (which options to set, bit per opt) and optval (options' new values). Future boolean options will only be added to the br_boolopt_id enum and then will have to be handled in br_boolopt_toggle/get. The API will automatically add the ability to change and export them via netlink, sysfs can use the single boolopt function versions to do the same. The behaviour with failing/succeeding is the same as with normal netlink option changing. If an option requires mapping to internal kernel flag or needs special configuration to be enabled then it should be handled in br_boolopt_toggle. It should also be able to retrieve an option's current state via br_boolopt_get. v2: WARN_ON() on unsupported option as that shouldn't be possible and also will help catch people who add new options without handling them for both set and get. Pass down extack so if an option desires it could set it on error and be more user-friendly. [1] https://www.spinics.net/lists/netdev/msg532698.html Signed-off-by: Nikolay Aleksandrov Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 18 ++++++++++++++++++ include/uapi/linux/if_link.h | 1 + 2 files changed, 19 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index e41eda3c71f1..6dc02c03bdf8 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -292,4 +292,22 @@ struct br_mcast_stats { __u64 mcast_bytes[BR_MCAST_DIR_SIZE]; __u64 mcast_packets[BR_MCAST_DIR_SIZE]; }; + +/* bridge boolean options + * IMPORTANT: if adding a new option do not forget to handle + * it in br_boolopt_toggle/get and bridge sysfs + */ +enum br_boolopt_id { + BR_BOOLOPT_MAX +}; + +/* struct br_boolopt_multi - change multiple bridge boolean options + * + * @optval: new option values (bit per option) + * @optmask: options to change (bit per option) + */ +struct br_boolopt_multi { + __u32 optval; + __u32 optmask; +}; #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index f42c069d81db..d6533828123a 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -288,6 +288,7 @@ enum { IFLA_BR_MCAST_IGMP_VERSION, IFLA_BR_MCAST_MLD_VERSION, IFLA_BR_VLAN_STATS_PER_PORT, + IFLA_BR_MULTI_BOOLOPT, __IFLA_BR_MAX, }; -- cgit v1.2.3 From 70e4272b4c81828e7d942209bae83b9d92752cfe Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 24 Nov 2018 04:34:21 +0200 Subject: net: bridge: add no_linklocal_learn bool option Use the new boolopt API to add an option which disables learning from link-local packets. The default is kept as before and learning is enabled. This is a simple map from a boolopt bit to a bridge private flag that is tested before learning. v2: pass NULL for extack via sysfs Signed-off-by: Nikolay Aleksandrov Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 6dc02c03bdf8..773e476a8e54 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -294,10 +294,13 @@ struct br_mcast_stats { }; /* bridge boolean options + * BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets + * * IMPORTANT: if adding a new option do not forget to handle * it in br_boolopt_toggle/get and bridge sysfs */ enum br_boolopt_id { + BR_BOOLOPT_NO_LL_LEARN, BR_BOOLOPT_MAX }; -- cgit v1.2.3 From cff478b9d9ccaee0de0e02700c63addf007b5d3c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 26 Nov 2018 15:42:04 +0100 Subject: netns: add support of NETNSA_TARGET_NSID Like it was done for link and address, add the ability to perform get/dump in another netns by specifying a target nsid attribute. Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/net_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_namespace.h b/include/uapi/linux/net_namespace.h index 0187c74d8889..0ed9dd61d32a 100644 --- a/include/uapi/linux/net_namespace.h +++ b/include/uapi/linux/net_namespace.h @@ -16,6 +16,7 @@ enum { NETNSA_NSID, NETNSA_PID, NETNSA_FD, + NETNSA_TARGET_NSID, __NETNSA_MAX, }; -- cgit v1.2.3 From 288f06a001eb6265122c620295b68a0dd53d1482 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 26 Nov 2018 15:42:06 +0100 Subject: netns: enable to dump full nsid translation table Like the previous patch, the goal is to ease to convert nsids from one netns to another netns. A new attribute (NETNSA_CURRENT_NSID) is added to the kernel answer when NETNSA_TARGET_NSID is provided, thus the user can easily convert nsids. Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/net_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_namespace.h b/include/uapi/linux/net_namespace.h index 0ed9dd61d32a..9f9956809565 100644 --- a/include/uapi/linux/net_namespace.h +++ b/include/uapi/linux/net_namespace.h @@ -17,6 +17,7 @@ enum { NETNSA_PID, NETNSA_FD, NETNSA_TARGET_NSID, + NETNSA_CURRENT_NSID, __NETNSA_MAX, }; -- cgit v1.2.3 From 7246d8ed4dcce23f7509949a77be15fa9f0e3d28 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 26 Nov 2018 14:16:17 -0800 Subject: bpf: helper to pop data from messages This adds a BPF SK_MSG program helper so that we can pop data from a msg. We use this to pop metadata from a previous push data call. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 23e2031a43d4..597afdbc1ab9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2268,6 +2268,19 @@ union bpf_attr { * * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) + * Description + * Will remove *pop* bytes from a *msg* starting at byte *start*. + * This may result in **ENOMEM** errors under certain situations if + * an allocation and copy are required due to a full ring buffer. + * However, the helper will try to avoid doing the allocation + * if possible. Other errors can occur if input parameters are + * invalid either due to *start* byte not being valid part of msg + * payload and/or *pop* value being to large. + * + * Return + * 0 on success, or a negative erro in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2360,7 +2373,8 @@ union bpf_attr { FN(map_push_elem), \ FN(map_pop_elem), \ FN(map_peek_elem), \ - FN(msg_push_data), + FN(msg_push_data), \ + FN(msg_pop_data), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 4f693b55c3d2d2239b8a0094b518a1e533cf75d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Nov 2018 14:42:03 -0800 Subject: tcp: implement coalescing on backlog queue In case GRO is not as efficient as it should be or disabled, we might have a user thread trapped in __release_sock() while softirq handler flood packets up to the point we have to drop. This patch balances work done from user thread and softirq, to give more chances to __release_sock() to complete its work before new packets are added the the backlog. This also helps if we receive many ACK packets, since GRO does not aggregate them. This patch brings ~60% throughput increase on a receiver without GRO, but the spectacular gain is really on 1000x release_sock() latency reduction I have measured. Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index f80135e5feaa..86dc24a96c90 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -243,6 +243,7 @@ enum LINUX_MIB_TCPREQQFULLDROP, /* TCPReqQFullDrop */ LINUX_MIB_TCPRETRANSFAIL, /* TCPRetransFail */ LINUX_MIB_TCPRCVCOALESCE, /* TCPRcvCoalesce */ + LINUX_MIB_TCPBACKLOGCOALESCE, /* TCPBacklogCoalesce */ LINUX_MIB_TCPOFOQUEUE, /* TCPOFOQueue */ LINUX_MIB_TCPOFODROP, /* TCPOFODrop */ LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */ -- cgit v1.2.3 From 26d31925cd5ea4b5b168ed538b0326d63ccbb384 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 28 Nov 2018 19:12:56 +0100 Subject: tun: implement carrier change The userspace may need to control the carrier state. Signed-off-by: Nicolas Dichtel Signed-off-by: Didier Pallard Signed-off-by: David S. Miller --- include/uapi/linux/if_tun.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index ee432cd3018c..23a6753b37df 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -59,6 +59,7 @@ #define TUNGETVNETBE _IOR('T', 223, int) #define TUNSETSTEERINGEBPF _IOR('T', 224, int) #define TUNSETFILTEREBPF _IOR('T', 225, int) +#define TUNSETCARRIER _IOW('T', 226, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 -- cgit v1.2.3 From e9ee9efc0d176512cdce9d27ff8549d7ffa2bfcd Mon Sep 17 00:00:00 2001 From: David Miller Date: Fri, 30 Nov 2018 21:08:14 -0800 Subject: bpf: Add BPF_F_ANY_ALIGNMENT. Often we want to write tests cases that check things like bad context offset accesses. And one way to do this is to use an odd offset on, for example, a 32-bit load. This unfortunately triggers the alignment checks first on platforms that do not set CONFIG_EFFICIENT_UNALIGNED_ACCESS. So the test case see the alignment failure rather than what it was testing for. It is often not completely possible to respect the original intention of the test, or even test the same exact thing, while solving the alignment issue. Another option could have been to check the alignment after the context and other validations are performed by the verifier, but that is a non-trivial change to the verifier. Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 597afdbc1ab9..8050caea7495 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -232,6 +232,20 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the + * verifier will allow any alignment whatsoever. On platforms + * with strict alignment requirements for loads ands stores (such + * as sparc and mips) the verifier validates that all loads and + * stores provably follow this requirement. This flag turns that + * checking and enforcement off. + * + * It is mostly used for testing when we want to validate the + * context and memory access aspects of the verifier, but because + * of an unaligned access the alignment check would trigger before + * the one we are interested in. + */ +#define BPF_F_ANY_ALIGNMENT (1U << 1) + /* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 -- cgit v1.2.3 From c3e9305983597a61083482581e83f0bd77ba306a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 12 Nov 2018 16:26:44 +0100 Subject: netfilter: remove NFC_* cache bits These are very very (for long time unused) caching infrastructure definition, remove then. They have nothing to do with the NFC subsystem. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter.h | 4 ---- include/uapi/linux/netfilter_decnet.h | 10 ---------- include/uapi/linux/netfilter_ipv4.h | 28 ---------------------------- include/uapi/linux/netfilter_ipv6.h | 29 ----------------------------- 4 files changed, 71 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index cca10e767cd8..ca9e63d6e0e4 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -34,10 +34,6 @@ /* only for userspace compatibility */ #ifndef __KERNEL__ -/* Generic cache responses from hook functions. - <= 0x2000 is used for protocol-flags. */ -#define NFC_UNKNOWN 0x4000 -#define NFC_ALTERED 0x8000 /* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */ #define NF_VERDICT_BITS 16 diff --git a/include/uapi/linux/netfilter_decnet.h b/include/uapi/linux/netfilter_decnet.h index 61f1c7dfd033..3c77f54560f2 100644 --- a/include/uapi/linux/netfilter_decnet.h +++ b/include/uapi/linux/netfilter_decnet.h @@ -15,16 +15,6 @@ #include /* for INT_MIN, INT_MAX */ -/* IP Cache bits. */ -/* Src IP address. */ -#define NFC_DN_SRC 0x0001 -/* Dest IP address. */ -#define NFC_DN_DST 0x0002 -/* Input device. */ -#define NFC_DN_IF_IN 0x0004 -/* Output device. */ -#define NFC_DN_IF_OUT 0x0008 - /* kernel define is in netfilter_defs.h */ #define NF_DN_NUMHOOKS 7 #endif /* ! __KERNEL__ */ diff --git a/include/uapi/linux/netfilter_ipv4.h b/include/uapi/linux/netfilter_ipv4.h index c3b060775e13..155e77d6a42d 100644 --- a/include/uapi/linux/netfilter_ipv4.h +++ b/include/uapi/linux/netfilter_ipv4.h @@ -13,34 +13,6 @@ #include /* for INT_MIN, INT_MAX */ -/* IP Cache bits. */ -/* Src IP address. */ -#define NFC_IP_SRC 0x0001 -/* Dest IP address. */ -#define NFC_IP_DST 0x0002 -/* Input device. */ -#define NFC_IP_IF_IN 0x0004 -/* Output device. */ -#define NFC_IP_IF_OUT 0x0008 -/* TOS. */ -#define NFC_IP_TOS 0x0010 -/* Protocol. */ -#define NFC_IP_PROTO 0x0020 -/* IP options. */ -#define NFC_IP_OPTIONS 0x0040 -/* Frag & flags. */ -#define NFC_IP_FRAG 0x0080 - -/* Per-protocol information: only matters if proto match. */ -/* TCP flags. */ -#define NFC_IP_TCPFLAGS 0x0100 -/* Source port. */ -#define NFC_IP_SRC_PT 0x0200 -/* Dest port. */ -#define NFC_IP_DST_PT 0x0400 -/* Something else about the proto */ -#define NFC_IP_PROTO_UNKNOWN 0x2000 - /* IP Hooks */ /* After promisc drops, checksum checks. */ #define NF_IP_PRE_ROUTING 0 diff --git a/include/uapi/linux/netfilter_ipv6.h b/include/uapi/linux/netfilter_ipv6.h index dc624fd24d25..80aa9b0799af 100644 --- a/include/uapi/linux/netfilter_ipv6.h +++ b/include/uapi/linux/netfilter_ipv6.h @@ -16,35 +16,6 @@ #include /* for INT_MIN, INT_MAX */ -/* IP Cache bits. */ -/* Src IP address. */ -#define NFC_IP6_SRC 0x0001 -/* Dest IP address. */ -#define NFC_IP6_DST 0x0002 -/* Input device. */ -#define NFC_IP6_IF_IN 0x0004 -/* Output device. */ -#define NFC_IP6_IF_OUT 0x0008 -/* TOS. */ -#define NFC_IP6_TOS 0x0010 -/* Protocol. */ -#define NFC_IP6_PROTO 0x0020 -/* IP options. */ -#define NFC_IP6_OPTIONS 0x0040 -/* Frag & flags. */ -#define NFC_IP6_FRAG 0x0080 - - -/* Per-protocol information: only matters if proto match. */ -/* TCP flags. */ -#define NFC_IP6_TCPFLAGS 0x0100 -/* Source port. */ -#define NFC_IP6_SRC_PT 0x0200 -/* Dest port. */ -#define NFC_IP6_DST_PT 0x0400 -/* Something else about the proto */ -#define NFC_IP6_PROTO_UNKNOWN 0x2000 - /* IP6 Hooks */ /* After promisc drops, checksum checks. */ #define NF_IP6_PRE_ROUTING 0 -- cgit v1.2.3 From e3da08d057002f9d0831949d51666c3e15dc6b29 Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Sun, 2 Dec 2018 20:18:19 -0500 Subject: bpf: allow BPF read access to qdisc pkt_len The pkt_len field in qdisc_skb_cb stores the skb length as it will appear on the wire after segmentation. For byte accounting, this value is more accurate than skb->len. It is computed on entry to the TC layer, so only valid there. Allow read access to this field from BPF tc classifier and action programs. The implementation is analogous to tc_classid, aside from restricting to read access. To distinguish it from skb->len and self-describe export as wire_len. Changes v1->v2 - Rename pkt_len to wire_len Signed-off-by: Petar Penkov Signed-off-by: Vlad Dumitrescu Signed-off-by: Willem de Bruijn Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8050caea7495..0183b8e70a9e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2497,6 +2497,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; __u64 tstamp; + __u32 wire_len; }; struct bpf_tunnel_key { -- cgit v1.2.3 From 90b1023f68c78b9b85e364250155776c8e421176 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 3 Dec 2018 12:13:35 +0000 Subject: bpf: fix documentation for eBPF helpers The missing indentation on the "Return" sections for bpf_map_pop_elem() and bpf_map_peek_elem() helpers break RST and man pages generation. This patch fixes them, and moves the description of those two helpers towards the end of the list (even though they are somehow related to the three first helpers for maps, the man page explicitly states that the helpers are sorted in chronological order). While at it, bring other minor formatting edits for eBPF helpers documentation: mostly blank lines removal, RST formatting, or other small nits for consistency. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 90 ++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 45 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0183b8e70a9e..572eb2d42768 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -496,18 +496,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) - * Description - * Pop an element from *map*. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) - * Description - * Get an element from *map* without removing it. - * Return - * 0 on success, or a negative error in case of failure. - * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1931,9 +1919,9 @@ union bpf_attr { * is set to metric from route (IPv4/IPv6 only), and ifindex * is set to the device index of the nexthop from the FIB lookup. * - * *plen* argument is the size of the passed in struct. - * *flags* argument can be a combination of one or more of the - * following values: + * *plen* argument is the size of the passed in struct. + * *flags* argument can be a combination of one or more of the + * following values: * * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB @@ -1942,9 +1930,9 @@ union bpf_attr { * Perform lookup from an egress perspective (default is * ingress). * - * *ctx* is either **struct xdp_md** for XDP programs or - * **struct sk_buff** tc cls_act programs. - * Return + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. + * Return * * < 0 if any input argument is invalid * * 0 on success (packet is forwarded, nexthop neighbor exists) * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the @@ -2089,8 +2077,8 @@ union bpf_attr { * translated to a keycode using the rc keymap, and reported as * an input key down event. After a period a key up event is * generated. This period can be extended by calling either - * **bpf_rc_keydown** () again with the same values, or calling - * **bpf_rc_repeat** (). + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). * * Some protocols include a toggle bit, in case the button was * released and pressed again between consecutive scancodes. @@ -2173,21 +2161,22 @@ union bpf_attr { * The *flags* meaning is specific for each map type, * and has to be 0 for cgroup local storage. * - * Depending on the bpf program type, a local storage area - * can be shared between multiple instances of the bpf program, + * Depending on the BPF program type, a local storage area + * can be shared between multiple instances of the BPF program, * running simultaneously. * * A user should care about the synchronization by himself. - * For example, by using the BPF_STX_XADD instruction to alter + * For example, by using the **BPF_STX_XADD** instruction to alter * the shared data. * Return - * Pointer to the local storage area. + * A pointer to the local storage area. * * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description - * Select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY map - * It checks the selected sk is matching the incoming - * request in the skb. + * Select a **SO_REUSEPORT** socket from a + * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * It checks the selected socket is matching the incoming + * request in the socket buffer. * Return * 0 on success, or a negative error in case of failure. * @@ -2195,7 +2184,7 @@ union bpf_attr { * Description * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, - * and if non-NULL, released via **bpf_sk_release**\ (). + * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used @@ -2221,15 +2210,15 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, *struct bpf_sock* - * return is from reuse->socks[] using hash of the packet. + * A pointer to *struct bpf_sock*, or **NULL** in case of failure. + * For sockets with reuseport option, **struct bpf_sock** + * return is from **reuse->socks**\ [] using hash of the packet. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) * Description * Look for UDP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, - * and if non-NULL, released via **bpf_sk_release**\ (). + * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used @@ -2255,46 +2244,57 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, *struct bpf_sock* - * return is from reuse->socks[] using hash of the packet. + * A pointer to **struct bpf_sock**, or **NULL** in case of + * failure. For sockets with reuseport option, **struct bpf_sock** + * return is from **reuse->socks**\ [] using hash of the packet. * - * int bpf_sk_release(struct bpf_sock *sk) + * int bpf_sk_release(struct bpf_sock *sock) * Description - * Release the reference held by *sock*. *sock* must be a non-NULL - * pointer that was returned from bpf_sk_lookup_xxx\ (). + * Release the reference held by *sock*. *sock* must be a + * non-**NULL** pointer that was returned from + * **bpf_sk_lookup_xxx**\ (). * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) * Description - * For socket policies, insert *len* bytes into msg at offset + * For socket policies, insert *len* bytes into *msg* at offset * *start*. * * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a - * *msg* it may want to insert metadata or options into the msg. + * *msg* it may want to insert metadata or options into the *msg*. * This can later be read and used by any of the lower layer BPF * hooks. * * This helper may fail if under memory pressure (a malloc * fails) in these cases BPF programs will get an appropriate * error and BPF programs will need to handle them. - * * Return * 0 on success, or a negative error in case of failure. * * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) - * Description + * Description * Will remove *pop* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if * an allocation and copy are required due to a full ring buffer. * However, the helper will try to avoid doing the allocation * if possible. Other errors can occur if input parameters are - * invalid either due to *start* byte not being valid part of msg + * invalid either due to *start* byte not being valid part of *msg* * payload and/or *pop* value being to large. - * * Return - * 0 on success, or a negative erro in case of failure. + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ -- cgit v1.2.3 From 846e980a87fc30075517d6d979548294d5461bdb Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Mon, 3 Dec 2018 07:58:59 +0000 Subject: devlink: Add 'fw_load_policy' generic parameter Many drivers load the device's firmware image during the initialization flow either from the flash or from the disk. Currently this option is not controlled by the user and the driver decides from where to load the firmware image. 'fw_load_policy' gives the ability to control this option which allows the user to choose between different loading policies supported by the driver. This parameter can be useful while testing and/or debugging the device. For example, testing a firmware bug fix. Signed-off-by: Shalom Toledo Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 79407bbd296d..6e52d3660654 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -163,6 +163,11 @@ enum devlink_param_cmode { DEVLINK_PARAM_CMODE_MAX = __DEVLINK_PARAM_CMODE_MAX - 1 }; +enum devlink_param_fw_load_policy_value { + DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER, + DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, -- cgit v1.2.3 From b5a36b1e1b138285ea0df34bf96c759e1e30fafd Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 3 Dec 2018 11:31:23 +0000 Subject: bpf: respect size hint to BPF_PROG_TEST_RUN if present Use data_size_out as a size hint when copying test output to user space. ENOSPC is returned if the output buffer is too small. Callers which so far did not set data_size_out are not affected. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 572eb2d42768..c8e1eeee2c5f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -374,8 +374,11 @@ union bpf_attr { struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ __u32 prog_fd; __u32 retval; - __u32 data_size_in; - __u32 data_size_out; + __u32 data_size_in; /* input: len of data_in */ + __u32 data_size_out; /* input/output: len of data_out + * returns ENOSPC if data_out + * is too small. + */ __aligned_u64 data_in; __aligned_u64 data_out; __u32 repeat; -- cgit v1.2.3 From cc1068eb6ad21a6cf54aa5f9ae25bf50fd5c9d4b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 15 Nov 2018 15:25:04 -0800 Subject: uapi/nl80211: fix spelling errors Spelling errors found by codespell Signed-off-by: Stephen Hemminger Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 51bd85b7d839..2b53c0e949c7 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1734,7 +1734,7 @@ enum nl80211_commands { * the values passed in @NL80211_ATTR_SCAN_SSIDS (eg. if an SSID * is included in the probe request, but the match attributes * will never let it go through), -EINVAL may be returned. - * If ommited, no filtering is done. + * If omitted, no filtering is done. * * @NL80211_ATTR_INTERFACE_COMBINATIONS: Nested attribute listing the supported * interface combinations. In each nested item, it contains attributes @@ -1839,7 +1839,7 @@ enum nl80211_commands { * * @NL80211_ATTR_INACTIVITY_TIMEOUT: timeout value in seconds, this can be * used by the drivers which has MLME in firmware and does not have support - * to report per station tx/rx activity to free up the staion entry from + * to report per station tx/rx activity to free up the station entry from * the list. This needs to be used when the driver advertises the * capability to timeout the stations. * @@ -2200,7 +2200,7 @@ enum nl80211_commands { * * @NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST: When present the RSSI level for BSSs in * the specified band is to be adjusted before doing - * %NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI based comparision to figure out + * %NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI based comparison to figure out * better BSSs. The attribute value is a packed structure * value as specified by &struct nl80211_bss_select_rssi_adjust. * @@ -4910,7 +4910,7 @@ enum nl80211_iface_limit_attrs { * numbers = [ #{STA} <= 1, #{P2P-client,P2P-GO} <= 3 ], max = 4 * => allows a STA plus three P2P interfaces * - * The list of these four possiblities could completely be contained + * The list of these four possibilities could completely be contained * within the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute to indicate * that any of these groups must match. * @@ -4940,7 +4940,7 @@ enum nl80211_if_combination_attrs { * enum nl80211_plink_state - state of a mesh peer link finite state machine * * @NL80211_PLINK_LISTEN: initial state, considered the implicit - * state of non existant mesh peer links + * state of non existent mesh peer links * @NL80211_PLINK_OPN_SNT: mesh plink open frame has been sent to * this mesh peer * @NL80211_PLINK_OPN_RCVD: mesh plink open frame has been received @@ -5432,7 +5432,7 @@ enum nl80211_timeout_reason { * request parameters IE in the probe request * @NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP: accept broadcast probe responses * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE: send probe request frames at - * rate of at least 5.5M. In case non OCE AP is dicovered in the channel, + * rate of at least 5.5M. In case non OCE AP is discovered in the channel, * only the first probe req in the channel will be sent in high rate. * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: allow probe request * tx deferral (dot11FILSProbeDelay shall be set to 15ms) -- cgit v1.2.3 From d30d42e08c76cb9323ec6121190eb026b07f773b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 5 Dec 2018 17:35:44 -0800 Subject: bpf: Change insn_offset to insn_off in bpf_func_info The later patch will introduce "struct bpf_line_info" which has member "line_off" and "file_off" referring back to the string section in btf. The line_"off" and file_"off" are more consistent to the naming convention in btf.h that means "offset" (e.g. name_off in "struct btf_type"). The to-be-added "struct bpf_line_info" also has another member, "insn_off" which is the same as the "insn_offset" in "struct bpf_func_info". Hence, this patch renames "insn_offset" to "insn_off" for "struct bpf_func_info". Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c8e1eeee2c5f..a84fd232d934 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2991,7 +2991,7 @@ struct bpf_flow_keys { }; struct bpf_func_info { - __u32 insn_offset; + __u32 insn_off; __u32 type_id; }; -- cgit v1.2.3 From c454a46b5efd8eff8880e88ece2976e60a26bf35 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 7 Dec 2018 16:42:25 -0800 Subject: bpf: Add bpf_line_info support This patch adds bpf_line_info support. It accepts an array of bpf_line_info objects during BPF_PROG_LOAD. The "line_info", "line_info_cnt" and "line_info_rec_size" are added to the "union bpf_attr". The "line_info_rec_size" makes bpf_line_info extensible in the future. The new "check_btf_line()" ensures the userspace line_info is valid for the kernel to use. When the verifier is translating/patching the bpf_prog (through "bpf_patch_insn_single()"), the line_infos' insn_off is also adjusted by the newly added "bpf_adj_linfo()". If the bpf_prog is jited, this patch also provides the jited addrs (in aux->jited_linfo) for the corresponding line_info.insn_off. "bpf_prog_fill_jited_linfo()" is added to fill the aux->jited_linfo. It is currently called by the x86 jit. Other jits can also use "bpf_prog_fill_jited_linfo()" and it will be done in the followup patches. In the future, if it deemed necessary, a particular jit could also provide its own "bpf_prog_fill_jited_linfo()" implementation. A few "*line_info*" fields are added to the bpf_prog_info such that the user can get the xlated line_info back (i.e. the line_info with its insn_off reflecting the translated prog). The jited_line_info is available if the prog is jited. It is an array of __u64. If the prog is not jited, jited_line_info_cnt is 0. The verifier's verbose log with line_info will be done in a follow up patch. Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a84fd232d934..7a66db8d15d5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -356,6 +356,9 @@ union bpf_attr { __u32 func_info_rec_size; /* userspace bpf_func_info size */ __aligned_u64 func_info; /* func info */ __u32 func_info_cnt; /* number of bpf_func_info records */ + __u32 line_info_rec_size; /* userspace bpf_line_info size */ + __aligned_u64 line_info; /* line info */ + __u32 line_info_cnt; /* number of bpf_line_info records */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -2679,6 +2682,12 @@ struct bpf_prog_info { __u32 func_info_rec_size; __aligned_u64 func_info; __u32 func_info_cnt; + __u32 line_info_cnt; + __aligned_u64 line_info; + __aligned_u64 jited_line_info; + __u32 jited_line_info_cnt; + __u32 line_info_rec_size; + __u32 jited_line_info_rec_size; } __attribute__((aligned(8))); struct bpf_map_info { @@ -2995,4 +3004,14 @@ struct bpf_func_info { __u32 type_id; }; +#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10) +#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) + +struct bpf_line_info { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 01d3240a04f4c09392e13c77b54d4423ebce2d72 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 6 Dec 2018 13:01:03 +0000 Subject: media: bpf: add bpf function to report mouse movement Some IR remotes have a directional pad or other pointer-like thing that can be used as a mouse. Make it possible to decode these types of IR protocols in BPF. Cc: netdev@vger.kernel.org Signed-off-by: Sean Young Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7a66db8d15d5..1bee1135866a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2301,6 +2301,20 @@ union bpf_attr { * payload and/or *pop* value being to large. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded pointer movement. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * Return + * 0 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2394,7 +2408,8 @@ union bpf_attr { FN(map_pop_elem), \ FN(map_peek_elem), \ FN(msg_push_data), \ - FN(msg_pop_data), + FN(msg_pop_data), \ + FN(rc_pointer_rel), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 11d8b82d2222cade12caad2c125f23023777dcbc Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 10 Dec 2018 14:14:08 -0800 Subject: bpf: rename *_info_cnt to nr_*_info in bpf_prog_info In uapi bpf.h, currently we have the following fields in the struct bpf_prog_info: __u32 func_info_cnt; __u32 line_info_cnt; __u32 jited_line_info_cnt; The above field names "func_info_cnt" and "line_info_cnt" also appear in union bpf_attr for program loading. The original intention is to keep the names the same between bpf_prog_info and bpf_attr so it will imply what we returned to user space will be the same as what the user space passed to the kernel. Such a naming convention in bpf_prog_info is not consistent with other fields like: __u32 nr_jited_ksyms; __u32 nr_jited_func_lens; This patch made this adjustment so in bpf_prog_info newly introduced *_info_cnt becomes nr_*_info. Acked-by: Song Liu Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1bee1135866a..f943ed803309 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2696,11 +2696,11 @@ struct bpf_prog_info { __u32 btf_id; __u32 func_info_rec_size; __aligned_u64 func_info; - __u32 func_info_cnt; - __u32 line_info_cnt; + __u32 nr_func_info; + __u32 nr_line_info; __aligned_u64 line_info; __aligned_u64 jited_line_info; - __u32 jited_line_info_cnt; + __u32 nr_jited_line_info; __u32 line_info_rec_size; __u32 jited_line_info_rec_size; } __attribute__((aligned(8))); -- cgit v1.2.3 From 0bd72117fba2dd51a65eaa7b480adc0eea9a4409 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 11 Dec 2018 10:26:33 +0100 Subject: bpf: fix up uapi helper description and sync bpf header with tools Minor markup fixup from bpf-next into net-next merge in the BPF helper description of bpf_sk_lookup_tcp() and bpf_sk_lookup_udp(). Also sync up the copy of bpf.h from tooling infrastructure. Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 92e962ba0c47..aa582cd5bfcf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2218,9 +2218,9 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, the *struct bpf_sock* - * result is from reuse->socks[] using the hash of the tuple. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description @@ -2254,9 +2254,9 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, the *struct bpf_sock* - * result is from reuse->socks[] using the hash of the tuple. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * * int bpf_sk_release(struct bpf_sock *sock) * Description -- cgit v1.2.3 From c872bdb38febb4c31ece3599c52cf1f833b89f4e Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 12 Dec 2018 09:37:46 -0800 Subject: bpf: include sub program tags in bpf_prog_info Changes v2 -> v3: 1. remove check for bpf_dump_raw_ok(). Changes v1 -> v2: 1. Fix error path as Martin suggested. This patch adds nr_prog_tags and prog_tags to bpf_prog_info. This is a reliable way for user space to get tags of all sub programs. Before this patch, user space need to find sub program tags via kallsyms. This feature will be used in BPF introspection, where user space queries information about BPF programs via sys_bpf. Signed-off-by: Song Liu Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index aa582cd5bfcf..e7d57e89f25f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2717,6 +2717,8 @@ struct bpf_prog_info { __u32 nr_jited_line_info; __u32 line_info_rec_size; __u32 jited_line_info_rec_size; + __u32 nr_prog_tags; + __aligned_u64 prog_tags; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3 From df9b0e30d44c901ac27c0f38cd54511b3f130c6d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 15 Dec 2018 14:09:06 -0800 Subject: neighbor: Add protocol attribute Similar to routes and rules, add protocol attribute to neighbor entries for easier tracking of how each was created. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 998155444e0d..cd144e3099a3 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -28,6 +28,7 @@ enum { NDA_MASTER, NDA_LINK_NETNSID, NDA_SRC_VNI, + NDA_PROTOCOL, /* Originator of entry */ __NDA_MAX }; -- cgit v1.2.3 From 6c4fc209fcf9d27efbaa48368773e4d2bfbd59aa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 16 Dec 2018 00:49:47 +0100 Subject: bpf: remove useless version check for prog load Existing libraries and tracing frameworks work around this kernel version check by automatically deriving the kernel version from uname(3) or similar such that the user does not need to do it manually; these workarounds also make the version check useless at the same time. Moreover, most other BPF tracing types enabling bpf_probe_read()-like functionality have /not/ adapted this check, and in general these days it is well understood anyway that all the tracing programs are not stable with regards to future kernels as kernel internal data structures are subject to change from release to release. Back at last netconf we discussed [0] and agreed to remove this check from bpf_prog_load() and instead document it here in the uapi header that there is no such guarantee for stable API for these programs. [0] http://vger.kernel.org/netconf2018_files/DanielBorkmann_netconf2018.pdf Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e7d57e89f25f..1d324c2cbca2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -133,6 +133,14 @@ enum bpf_map_type { BPF_MAP_TYPE_STACK, }; +/* Note that tracing related programs such as + * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT} + * are not subject to a stable API since kernel internal data + * structures can change from release to release and may + * therefore break existing tracing BPF programs. Tracing BPF + * programs correspond to /a/ specific kernel which is to be + * analyzed, and not /a/ specific kernel /and/ all future ones. + */ enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, @@ -343,7 +351,7 @@ union bpf_attr { __u32 log_level; /* verbosity level of verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ - __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 kern_version; /* not used */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; __u32 prog_ifindex; /* ifindex of netdev to prep for */ -- cgit v1.2.3 From 9d5f9f701b1891466fb3dbb1806ad97716f95cc3 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 15 Dec 2018 22:13:51 -0800 Subject: bpf: btf: fix struct/union/fwd types with kind_flag This patch fixed two issues with BTF. One is related to struct/union bitfield encoding and the other is related to forward type. Issue #1 and solution: ====================== Current btf encoding of bitfield follows what pahole generates. For each bitfield, pahole will duplicate the type chain and put the bitfield size at the final int or enum type. Since the BTF enum type cannot encode bit size, pahole workarounds the issue by generating an int type whenever the enum bit size is not 32. For example, -bash-4.4$ cat t.c typedef int ___int; enum A { A1, A2, A3 }; struct t { int a[5]; ___int b:4; volatile enum A c:4; } g; -bash-4.4$ gcc -c -O2 -g t.c The current kernel supports the following BTF encoding: $ pahole -JV t.o [1] TYPEDEF ___int type_id=2 [2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED [3] ENUM A size=4 vlen=3 A1 val=0 A2 val=1 A3 val=2 [4] STRUCT t size=24 vlen=3 a type_id=5 bits_offset=0 b type_id=9 bits_offset=160 c type_id=11 bits_offset=164 [5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5 [6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none) [7] VOLATILE (anon) type_id=3 [8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none) [9] TYPEDEF ___int type_id=8 [10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED [11] VOLATILE (anon) type_id=10 Two issues are in the above: . by changing enum type to int, we lost the original type information and this will not be ideal later when we try to convert BTF to a header file. . the type duplication for bitfields will cause BTF bloat. Duplicated types cannot be deduplicated later if the bitfield size is different. To fix this issue, this patch implemented a compatible change for BTF struct type encoding: . the bit 31 of struct_type->info, previously reserved, now is used to indicate whether bitfield_size is encoded in btf_member or not. . if bit 31 of struct_type->info is set, btf_member->offset will encode like: bit 0 - 23: bit offset bit 24 - 31: bitfield size if bit 31 is not set, the old behavior is preserved: bit 0 - 31: bit offset So if the struct contains a bit field, the maximum bit offset will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum bitfield size will be 256 which is enough for today as maximum bitfield in compiler can be 128 where int128 type is supported. This kernel patch intends to support the new BTF encoding: $ pahole -JV t.o [1] TYPEDEF ___int type_id=2 [2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED [3] ENUM A size=4 vlen=3 A1 val=0 A2 val=1 A3 val=2 [4] STRUCT t kind_flag=1 size=24 vlen=3 a type_id=5 bitfield_size=0 bits_offset=0 b type_id=1 bitfield_size=4 bits_offset=160 c type_id=7 bitfield_size=4 bits_offset=164 [5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5 [6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none) [7] VOLATILE (anon) type_id=3 Issue #2 and solution: ====================== Current forward type in BTF does not specify whether the original type is struct or union. This will not work for type pretty print and BTF-to-header-file conversion as struct/union must be specified. $ cat tt.c struct t; union u; int foo(struct t *t, union u *u) { return 0; } $ gcc -c -g -O2 tt.c $ pahole -JV tt.o [1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED [2] FWD t type_id=0 [3] PTR (anon) type_id=2 [4] FWD u type_id=0 [5] PTR (anon) type_id=4 To fix this issue, similar to issue #1, type->info bit 31 is used. If the bit is set, it is union type. Otherwise, it is a struct type. $ pahole -JV tt.o [1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED [2] FWD t kind_flag=0 type_id=0 [3] PTR (anon) kind_flag=0 type_id=2 [4] FWD u kind_flag=1 type_id=0 [5] PTR (anon) kind_flag=0 type_id=4 Pahole/LLVM change: =================== The new kind_flag functionality has been implemented in pahole and llvm: https://github.com/yonghong-song/pahole/tree/bitfield https://github.com/yonghong-song/llvm/tree/bitfield Note that pahole hasn't implemented func/func_proto kind and .BTF.ext. So to print function signature with bpftool, the llvm compiler should be used. Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)") Acked-by: Martin KaFai Lau Signed-off-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- include/uapi/linux/btf.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 14f66948fc95..7b7475ef2f17 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -34,7 +34,9 @@ struct btf_type { * bits 0-15: vlen (e.g. # of struct's members) * bits 16-23: unused * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-31: unused + * bits 28-30: unused + * bit 31: kind_flag, currently used by + * struct, union and fwd */ __u32 info; /* "size" is used by INT, ENUM, STRUCT and UNION. @@ -52,6 +54,7 @@ struct btf_type { #define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) #define BTF_INFO_VLEN(info) ((info) & 0xffff) +#define BTF_INFO_KFLAG(info) ((info) >> 31) #define BTF_KIND_UNKN 0 /* Unknown */ #define BTF_KIND_INT 1 /* Integer */ @@ -110,9 +113,22 @@ struct btf_array { struct btf_member { __u32 name_off; __u32 type; - __u32 offset; /* offset in bits */ + /* If the type info kind_flag is set, the btf_member offset + * contains both member bitfield size and bit offset. The + * bitfield size is set for bitfield members. If the type + * info kind_flag is not set, the offset contains only bit + * offset. + */ + __u32 offset; }; +/* If the struct/union type info kind_flag is set, the + * following two macros are used to access bitfield_size + * and bit_offset from btf_member.offset. + */ +#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24) +#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff) + /* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". * The exact number of btf_param is stored in the vlen (of the * info in "struct btf_type"). -- cgit v1.2.3 From 30db641ef4f68054db9b191b6c0200fb1a96d458 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 15 Dec 2018 11:03:23 +0200 Subject: cfg80211: clarify LCI/civic location documentation The older code and current userspace assumed that this data is the content of the Measurement Report element, starting with the Measurement Token. Clarify this in the documentation. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 2b53c0e949c7..4625a8624ba2 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5893,9 +5893,11 @@ enum nl80211_external_auth_action { * @__NL80211_FTM_RESP_ATTR_INVALID: Invalid * @NL80211_FTM_RESP_ATTR_ENABLED: FTM responder is enabled * @NL80211_FTM_RESP_ATTR_LCI: The content of Measurement Report Element - * (9.4.2.22 in 802.11-2016) with type 8 - LCI (9.4.2.22.10) + * (9.4.2.22 in 802.11-2016) with type 8 - LCI (9.4.2.22.10), + * i.e. starting with the measurement token * @NL80211_FTM_RESP_ATTR_CIVIC: The content of Measurement Report Element - * (9.4.2.22 in 802.11-2016) with type 11 - Civic (Section 9.4.2.22.13) + * (9.4.2.22 in 802.11-2016) with type 11 - Civic (Section 9.4.2.22.13), + * i.e. starting with the measurement token * @__NL80211_FTM_RESP_ATTR_LAST: Internal * @NL80211_FTM_RESP_ATTR_MAX: highest FTM responder attribute. */ @@ -6295,9 +6297,15 @@ enum nl80211_peer_measurement_ftm_failure_reasons { * @NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE: distance variance (u64, mm^2, note * that standard deviation is the square root of variance, optional) * @NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD: distance spread (u64, mm, optional) - * @NL80211_PMSR_FTM_RESP_ATTR_LCI: LCI data from peer (binary, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_LCI: LCI data from peer (binary, optional); + * this is the contents of the Measurement Report Element (802.11-2016 + * 9.4.2.22.1) starting with the Measurement Token, with Measurement + * Type 8. * @NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC: civic location data from peer - * (binary, optional) + * (binary, optional); + * this is the contents of the Measurement Report Element (802.11-2016 + * 9.4.2.22.1) starting with the Measurement Token, with Measurement + * Type 11. * @NL80211_PMSR_FTM_RESP_ATTR_PAD: ignore, for u64/s64 padding only * * @NUM_NL80211_PMSR_FTM_RESP_ATTR: internal -- cgit v1.2.3 From 30c63115e20b70f89b7cfb66b35e2a0ef4b0ef07 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Tue, 4 Dec 2018 17:46:52 +0530 Subject: nl80211: Add support to notify radar event info received from STA Currently radar detection and corresponding channel switch is handled at the AP device. STA ignores these detected radar events since the radar signal can be seen mostly by the AP as well. But in scenarios where a radar signal is seen only at STA, notifying this event to the AP which can trigger a channel switch can be useful. Stations can report such radar events autonomously through Spectrum management (Measurement Report) action frame to its AP. The userspace on processing the report can notify the kernel with the use of the added NL80211_CMD_NOTIFY_RADAR to indicate the detected event and inturn adding the reported channel to NOL. Signed-off-by: Sriram R Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4625a8624ba2..31ae5c7f10e3 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1060,6 +1060,11 @@ * the measurement completed, using the measurement cookie * (%NL80211_ATTR_COOKIE). * + * @NL80211_CMD_NOTIFY_RADAR: Notify the kernel that a radar signal was + * detected and reported by a neighboring device on the channel + * indicated by %NL80211_ATTR_WIPHY_FREQ and other attributes + * determining the width and type. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1278,6 +1283,8 @@ enum nl80211_commands { NL80211_CMD_PEER_MEASUREMENT_RESULT, NL80211_CMD_PEER_MEASUREMENT_COMPLETE, + NL80211_CMD_NOTIFY_RADAR, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ -- cgit v1.2.3 From 3bdbd0228e7555ec745e08469b98e5a0966409d6 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 16 Dec 2018 15:47:04 -0800 Subject: bpf: sockmap, metadata support for reporting size of msg This adds metadata to sk_msg_md for BPF programs to read the sk_msg size. When the SK_MSG program is running under an application that is using sendfile the data is not copied into sk_msg buffers by default. Rather the BPF program uses sk_msg_pull_data to read the bytes in. This avoids doing the costly memcopy instructions when they are not in fact needed. However, if we don't know the size of the sk_msg we have to guess if needed bytes are available by doing a pull request which may fail. By including the size of the sk_msg BPF programs can check the size before issuing sk_msg_pull_data requests. Additionally, the same applies for sendmsg calls when the application provides multiple iovs. Here the BPF program needs to pull in data to update data pointers but its not clear where the data ends without a size parameter. In many cases "guessing" is not easy to do and results in multiple calls to pull and without bounded loops everything gets fairly tricky. Clean this up by including a u32 size field. Note, all writes into sk_msg_md are rejected already from sk_msg_is_valid_access so nothing additional is needed there. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1d324c2cbca2..91c43884f295 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2665,6 +2665,7 @@ struct sk_msg_md { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + __u32 size; /* Total size of sk_msg */ }; struct sk_reuseport_md { -- cgit v1.2.3