From 5037a00992e5fcb3d8509964313565a3dab6697c Mon Sep 17 00:00:00 2001
From: Sunil Dutt <usdutt@qti.qualcomm.com>
Date: Thu, 25 Jan 2018 17:13:37 +0200
Subject: nl80211: Introduce scan flags to emphasize requested scan behavior

This commit defines new scan flags (LOW_SPAN, LOW_POWER, HIGH_LATENCY)
to emphasize the requested scan behavior for the driver. These flags
are optional and are mutually exclusive. The implementation of the
respective functionality can be driver/hardware specific.

These flags can be used to control the compromise between how long
a scan takes, how much power it uses, and high accurate/complete
the scan is in finding the BSSs.

Signed-off-by: Sunil Dutt <usdutt@qti.qualcomm.com>
Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c587a61c32bf..6f60503fa617 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4945,6 +4945,9 @@ enum nl80211_feature_flags {
  *	probe request tx deferral and suppression
  * @NL80211_EXT_FEATURE_MFP_OPTIONAL: Driver supports the %NL80211_MFP_OPTIONAL
  *	value in %NL80211_ATTR_USE_MFP.
+ * @NL80211_EXT_FEATURE_LOW_SPAN_SCAN: Driver supports low span scan.
+ * @NL80211_EXT_FEATURE_LOW_POWER_SCAN: Driver supports low power scan.
+ * @NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN: Driver supports high accuracy scan.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4972,6 +4975,9 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE,
 	NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION,
 	NL80211_EXT_FEATURE_MFP_OPTIONAL,
+	NL80211_EXT_FEATURE_LOW_SPAN_SCAN,
+	NL80211_EXT_FEATURE_LOW_POWER_SCAN,
+	NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
@@ -5032,6 +5038,10 @@ enum nl80211_timeout_reason {
  * of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN
  * requests.
  *
+ * NL80211_SCAN_FLAG_LOW_SPAN, NL80211_SCAN_FLAG_LOW_POWER, and
+ * NL80211_SCAN_FLAG_HIGH_ACCURACY flags are exclusive of each other, i.e., only
+ * one of them can be used in the request.
+ *
  * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority
  * @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning
  * @NL80211_SCAN_FLAG_AP: force a scan even if the interface is configured
@@ -5059,7 +5069,20 @@ enum nl80211_timeout_reason {
  *	and suppression (if it has received a broadcast Probe Response frame,
  *	Beacon frame or FILS Discovery frame from an AP that the STA considers
  *	a suitable candidate for (re-)association - suitable in terms of
- *	SSID and/or RSSI
+ *	SSID and/or RSSI.
+ * @NL80211_SCAN_FLAG_LOW_SPAN: Span corresponds to the total time taken to
+ *	accomplish the scan. Thus, this flag intends the driver to perform the
+ *	scan request with lesser span/duration. It is specific to the driver
+ *	implementations on how this is accomplished. Scan accuracy may get
+ *	impacted with this flag.
+ * @NL80211_SCAN_FLAG_LOW_POWER: This flag intends the scan attempts to consume
+ *	optimal possible power. Drivers can resort to their specific means to
+ *	optimize the power. Scan accuracy may get impacted with this flag.
+ * @NL80211_SCAN_FLAG_HIGH_ACCURACY: Accuracy here intends to the extent of scan
+ *	results obtained. Thus HIGH_ACCURACY scan flag aims to get maximum
+ *	possible scan results. This flag hints the driver to use the best
+ *	possible scan configuration to improve the accuracy in scanning.
+ *	Latency and power use may get impacted with this flag.
  */
 enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_LOW_PRIORITY				= 1<<0,
@@ -5070,6 +5093,9 @@ enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP		= 1<<5,
 	NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE		= 1<<6,
 	NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION	= 1<<7,
+	NL80211_SCAN_FLAG_LOW_SPAN				= 1<<8,
+	NL80211_SCAN_FLAG_LOW_POWER				= 1<<9,
+	NL80211_SCAN_FLAG_HIGH_ACCURACY				= 1<<10,
 };
 
 /**
-- 
cgit v1.2.3


From 40cbfa90218bc570a7959b436b9d48a18c361041 Mon Sep 17 00:00:00 2001
From: Srinivas Dasari <dasaris@qti.qualcomm.com>
Date: Thu, 25 Jan 2018 17:13:38 +0200
Subject: cfg80211/nl80211: Optional authentication offload to userspace

This interface allows the host driver to offload the authentication to
user space. This is exclusively defined for host drivers that do not
define separate commands for authentication and association, but rely on
userspace SME (e.g., in wpa_supplicant for the ~WPA_DRIVER_FLAGS_SME
case) for the authentication to happen. This can be used to implement
SAE without full implementation in the kernel/firmware while still being
able to use NL80211_CMD_CONNECT with driver-based BSS selection.

Host driver sends NL80211_CMD_EXTERNAL_AUTH event to start/abort
authentication to the port on which connect is triggered and status
of authentication is further indicated by user space to host
driver through the same command response interface.

User space entities advertise this capability through the
NL80211_ATTR_EXTERNAL_AUTH_SUPP flag in the NL80211_CMD_CONNECT request.
Host drivers shall look at this capability to offload the authentication.

Signed-off-by: Srinivas Dasari <dasaris@qti.qualcomm.com>
Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
[add socket connection ownership check]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 47 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 6f60503fa617..c2342456cf16 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -992,6 +992,27 @@
  *
  * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded.
  *
+ * @NL80211_CMD_EXTERNAL_AUTH: This interface is exclusively defined for host
+ *	drivers that do not define separate commands for authentication and
+ *	association, but rely on user space for the authentication to happen.
+ *	This interface acts both as the event request (driver to user space)
+ *	to trigger the authentication and command response (userspace to
+ *	driver) to indicate the authentication status.
+ *
+ *	User space uses the %NL80211_CMD_CONNECT command to the host driver to
+ *	trigger a connection. The host driver selects a BSS and further uses
+ *	this interface to offload only the authentication part to the user
+ *	space. Authentication frames are passed between the driver and user
+ *	space through the %NL80211_CMD_FRAME interface. Host driver proceeds
+ *	further with the association after getting successful authentication
+ *	status. User space indicates the authentication status through
+ *	%NL80211_ATTR_STATUS_CODE attribute in %NL80211_CMD_EXTERNAL_AUTH
+ *	command interface.
+ *
+ *	Host driver reports this status on an authentication failure to the
+ *	user space through the connect result as the user space would have
+ *	initiated the connection through the connect request.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1198,6 +1219,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_RELOAD_REGDB,
 
+	NL80211_CMD_EXTERNAL_AUTH,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -2153,6 +2176,16 @@ enum nl80211_commands {
  * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT.
  * @NL80211_ATTR_PORT_AUTHORIZED: (reserved)
  *
+ * @NL80211_ATTR_EXTERNAL_AUTH_ACTION: Identify the requested external
+ *     authentication operation (u32 attribute with an
+ *     &enum nl80211_external_auth_action value). This is used with the
+ *     &NL80211_CMD_EXTERNAL_AUTH request event.
+ * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user
+ *     space supports external authentication. This attribute shall be used
+ *     only with %NL80211_CMD_CONNECT request. The driver may offload
+ *     authentication processing to user space if this capability is indicated
+ *     in NL80211_CMD_CONNECT requests from the user space.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2579,6 +2612,9 @@ enum nl80211_attrs {
 	NL80211_ATTR_PMKR0_NAME,
 	NL80211_ATTR_PORT_AUTHORIZED,
 
+	NL80211_ATTR_EXTERNAL_AUTH_ACTION,
+	NL80211_ATTR_EXTERNAL_AUTH_SUPPORT,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -5495,4 +5531,15 @@ enum nl80211_nan_match_attributes {
 	NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1
 };
 
+/**
+ * nl80211_external_auth_action - Action to perform with external
+ *     authentication request. Used by NL80211_ATTR_EXTERNAL_AUTH_ACTION.
+ * @NL80211_EXTERNAL_AUTH_START: Start the authentication.
+ * @NL80211_EXTERNAL_AUTH_ABORT: Abort the ongoing authentication.
+ */
+enum nl80211_external_auth_action {
+	NL80211_EXTERNAL_AUTH_START,
+	NL80211_EXTERNAL_AUTH_ABORT,
+};
+
 #endif /* __LINUX_NL80211_H */
-- 
cgit v1.2.3


From 466b9936bf93b7ec3bce1dcd493262ff0a8a4f44 Mon Sep 17 00:00:00 2001
From: "tamizhr@codeaurora.org" <tamizhr@codeaurora.org>
Date: Wed, 31 Jan 2018 16:24:49 +0530
Subject: cfg80211: Add support to notify station's opmode change to userspace

ht/vht action frames will be sent to AP from station to notify
change of its ht/vht opmode(max bandwidth, smps mode or nss) modified
values. Currently these valuse used by driver/firmware for rate control
algorithm. This patch introduces NL80211_CMD_STA_OPMODE_CHANGED
command to notify those modified/current supported values(max bandwidth,
smps mode, max nss) to userspace application. This will be useful for the
application like steering, which closely monitoring station's capability
changes. Since the application has taken these values during station
association.

Signed-off-by: Tamizh chelvam <tamizhr@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c2342456cf16..ca3d5a613fc0 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1013,6 +1013,11 @@
  *	user space through the connect result as the user space would have
  *	initiated the connection through the connect request.
  *
+ * @NL80211_CMD_STA_OPMODE_CHANGED: An event that notify station's
+ *	ht opmode or vht opmode changes using any of &NL80211_ATTR_SMPS_MODE,
+ *	&NL80211_ATTR_CHANNEL_WIDTH,&NL80211_ATTR_NSS attributes with its
+ *	address(specified in &NL80211_ATTR_MAC).
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1221,6 +1226,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_EXTERNAL_AUTH,
 
+	NL80211_CMD_STA_OPMODE_CHANGED,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -2186,6 +2193,9 @@ enum nl80211_commands {
  *     authentication processing to user space if this capability is indicated
  *     in NL80211_CMD_CONNECT requests from the user space.
  *
+ * @NL80211_ATTR_NSS: Station's New/updated  RX_NSS value notified using this
+ *	u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2615,6 +2625,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_EXTERNAL_AUTH_ACTION,
 	NL80211_ATTR_EXTERNAL_AUTH_SUPPORT,
 
+	NL80211_ATTR_NSS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
-- 
cgit v1.2.3


From 01883eda72bd3f0a6c81447e4f223de14033fd9d Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 15 Feb 2018 10:49:35 -0800
Subject: rds: support for zcopy completion notification

RDS removes a datagram (rds_message) from the retransmit queue when
an ACK is received. The ACK indicates that the receiver has queued
the RDS datagram, so that the sender can safely forget the datagram.
When all references to the rds_message are quiesced, rds_message_purge
is called to release resources used by the rds_message

If the datagram to be removed had pinned pages set up, add
an entry to the rs->rs_znotify_queue so that the notifcation
will be sent up via rds_rm_zerocopy_callback() when the
rds_message is eventually freed by rds_message_purge.

rds_rm_zerocopy_callback() attempts to batch the number of cookies
sent with each notification  to a max of SO_EE_ORIGIN_MAX_ZCOOKIES.
This is achieved by checking the tail skb in the sk_error_queue:
if this has room for one more cookie, the cookie from the
current notification is added; else a new skb is added to the
sk_error_queue. Every invocation of rds_rm_zerocopy_callback() will
trigger a ->sk_error_report to notify the application.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/errqueue.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index dc64cfaf13da..28812eda4209 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -20,11 +20,13 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_ICMP6	3
 #define SO_EE_ORIGIN_TXSTATUS	4
 #define SO_EE_ORIGIN_ZEROCOPY	5
+#define SO_EE_ORIGIN_ZCOOKIE	6
 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
 #define SO_EE_CODE_ZEROCOPY_COPIED	1
+#define	SO_EE_ORIGIN_MAX_ZCOOKIES	8
 
 /**
  *	struct scm_timestamping - timestamps exposed through cmsg
-- 
cgit v1.2.3


From 0cebaccef3acbdfbc2d85880a2efb765d2f4e2e3 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 15 Feb 2018 10:49:36 -0800
Subject: rds: zerocopy Tx support.

If the MSG_ZEROCOPY flag is specified with rds_sendmsg(), and,
if the SO_ZEROCOPY socket option has been set on the PF_RDS socket,
application pages sent down with rds_sendmsg() are pinned.

The pinning uses the accounting infrastructure added by
Commit a91dbff551a6 ("sock: ulimit on MSG_ZEROCOPY pages")

The payload bytes in the message may not be modified for the
duration that the message has been pinned. A multi-threaded
application using this infrastructure may thus need to be notified
about send-completion so that it can free/reuse the buffers
passed to rds_sendmsg(). Notification of send-completion will
identify each message-buffer by a cookie that the application
must specify as ancillary data to rds_sendmsg().
The ancillary data in this case has cmsg_level == SOL_RDS
and cmsg_type == RDS_CMSG_ZCOPY_COOKIE.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rds.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index e71d4491f225..12e3bca32cad 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -103,6 +103,7 @@
 #define RDS_CMSG_MASKED_ATOMIC_FADD	8
 #define RDS_CMSG_MASKED_ATOMIC_CSWP	9
 #define RDS_CMSG_RXPATH_LATENCY		11
+#define	RDS_CMSG_ZCOPY_COOKIE		12
 
 #define RDS_INFO_FIRST			10000
 #define RDS_INFO_COUNTERS		10000
-- 
cgit v1.2.3


From 1ec010e705934c8acbe7dbf31afc81e60e3d828b Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 16 Feb 2018 11:03:07 +0100
Subject: tun: export flags, uid, gid, queue information over netlink

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 6d9447700e18..11d0c0ea2bfa 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -941,4 +941,22 @@ enum {
 	IFLA_EVENT_BONDING_OPTIONS,	/* change in bonding options */
 };
 
+/* tun section */
+
+enum {
+	IFLA_TUN_UNSPEC,
+	IFLA_TUN_OWNER,
+	IFLA_TUN_GROUP,
+	IFLA_TUN_TYPE,
+	IFLA_TUN_PI,
+	IFLA_TUN_VNET_HDR,
+	IFLA_TUN_PERSIST,
+	IFLA_TUN_MULTI_QUEUE,
+	IFLA_TUN_NUM_QUEUES,
+	IFLA_TUN_NUM_DISABLED_QUEUES,
+	__IFLA_TUN_MAX,
+};
+
+#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
-- 
cgit v1.2.3


From c4b50cd31d25c3d17886ffc47ca4a9a12c6dc9bf Mon Sep 17 00:00:00 2001
From: Venkateswara Naralasetty <vnaralas@codeaurora.org>
Date: Tue, 13 Feb 2018 11:03:06 +0530
Subject: cfg80211: send ack_signal to user in probe client response

This patch provides support to get ack signal in probe client response
and in station info from user.

Signed-off-by: Venkateswara Naralasetty <vnaralas@codeaurora.org>
[squash in compilation fixes]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ca3d5a613fc0..c13c84304be3 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2626,6 +2626,7 @@ enum nl80211_attrs {
 	NL80211_ATTR_EXTERNAL_AUTH_SUPPORT,
 
 	NL80211_ATTR_NSS,
+	NL80211_ATTR_ACK_SIGNAL,
 
 	/* add attributes here, update the policy in nl80211.c */
 
@@ -2947,6 +2948,7 @@ enum nl80211_sta_bss_param {
  * @NL80211_STA_INFO_RX_DURATION: aggregate PPDU duration for all frames
  *	received from the station (u64, usec)
  * @NL80211_STA_INFO_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_STA_INFO_ACK_SIGNAL: signal strength of the last ACK frame(u8, dBm)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2985,6 +2987,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_TID_STATS,
 	NL80211_STA_INFO_RX_DURATION,
 	NL80211_STA_INFO_PAD,
+	NL80211_STA_INFO_ACK_SIGNAL,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
-- 
cgit v1.2.3


From ccc007e4a746bb592d3e72106f00241f81d51410 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Thu, 15 Feb 2018 19:42:43 +0200
Subject: net: sched: add em_ipt ematch for calling xtables matches

The commit a new tc ematch for using netfilter xtable matches.

This allows early classification as well as mirroning/redirecting traffic
based on logic implemented in netfilter extensions.

Current supported use case is classification based on the incoming IPSec
state used during decpsulation using the 'policy' iptables extension
(xt_policy).

The module dynamically fetches the netfilter match module and calls
it using a fake xt_action_param structure based on validated userspace
provided parameters.

As the xt_policy match does not access skb->data, no skb modifications
are needed on match.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h             |  3 ++-
 include/uapi/linux/tc_ematch/tc_em_ipt.h | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 include/uapi/linux/tc_ematch/tc_em_ipt.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 46c506615f4a..7cafb26df555 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -555,7 +555,8 @@ enum {
 #define	TCF_EM_VLAN		6
 #define	TCF_EM_CANID		7
 #define	TCF_EM_IPSET		8
-#define	TCF_EM_MAX		8
+#define	TCF_EM_IPT		9
+#define	TCF_EM_MAX		9
 
 enum {
 	TCF_EM_PROG_TC
diff --git a/include/uapi/linux/tc_ematch/tc_em_ipt.h b/include/uapi/linux/tc_ematch/tc_em_ipt.h
new file mode 100644
index 000000000000..49a65530992c
--- /dev/null
+++ b/include/uapi/linux/tc_ematch/tc_em_ipt.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_TC_EM_IPT_H
+#define __LINUX_TC_EM_IPT_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+enum {
+	TCA_EM_IPT_UNSPEC,
+	TCA_EM_IPT_HOOK,
+	TCA_EM_IPT_MATCH_NAME,
+	TCA_EM_IPT_MATCH_REVISION,
+	TCA_EM_IPT_NFPROTO,
+	TCA_EM_IPT_MATCH_DATA,
+	__TCA_EM_IPT_MAX
+};
+
+#define TCA_EM_IPT_MAX (__TCA_EM_IPT_MAX - 1)
+
+#endif
-- 
cgit v1.2.3


From cac56209a66ea3b0be67aa2966b2c628b944da1e Mon Sep 17 00:00:00 2001
From: Donald Sharp <sharpd@cumulusnetworks.com>
Date: Tue, 20 Feb 2018 08:55:58 -0500
Subject: net: Allow a rule to track originating protocol

Allow a rule that is being added/deleted/modified or
dumped to contain the originating protocol's id.

The protocol is handled just like a routes originating
protocol is.  This is especially useful because there
is starting to be a plethora of different user space
programs adding rules.

Allow the vrf device to specify that the kernel is the originator
of the rule created for this device.

Signed-off-by: Donald Sharp <sharpd@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/fib_rules.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 2b642bf9b5a0..925539172d5b 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -23,8 +23,8 @@ struct fib_rule_hdr {
 	__u8		tos;
 
 	__u8		table;
+	__u8		proto;
 	__u8		res1;	/* reserved */
-	__u8		res2;	/* reserved */
 	__u8		action;
 
 	__u32		flags;
-- 
cgit v1.2.3


From 4fe0de5b143762d327bfaf1d7be7c5b58041a18c Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Wed, 31 Jan 2018 19:04:15 -0600
Subject: uapi: Add 802.11 Preauthentication to if_ether

This adds 0x88c7 protocol type to if_ether.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index f8cb5760ea4f..54585906e50a 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -89,6 +89,7 @@
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_8021AD	0x88A8          /* 802.1ad Service VLAN		*/
 #define ETH_P_802_EX1	0x88B5		/* 802.1 Local Experimental 1.  */
+#define ETH_P_PREAUTH	0x88C7		/* 802.11 Preauthentication */
 #define ETH_P_TIPC	0x88CA		/* TIPC 			*/
 #define ETH_P_MACSEC	0x88E5		/* 802.1ae MACsec */
 #define ETH_P_8021AH	0x88E7          /* 802.1ah Backbone Service Tag */
-- 
cgit v1.2.3


From 1b71af6053af1bd2f849e9fda4f71c1e3f145dcf Mon Sep 17 00:00:00 2001
From: Donald Sharp <sharpd@cumulusnetworks.com>
Date: Fri, 23 Feb 2018 14:01:52 -0500
Subject: net: fib_rules: Add new attribute to set protocol

For ages iproute2 has used `struct rtmsg` as the ancillary header for
FIB rules and in the process set the protocol value to RTPROT_BOOT.
Until ca56209a66 ("net: Allow a rule to track originating protocol")
the kernel rules code ignored the protocol value sent from userspace
and always returned 0 in notifications. To avoid incompatibility with
existing iproute2, send the protocol as a new attribute.

Fixes: cac56209a66 ("net: Allow a rule to track originating protocol")
Signed-off-by: Donald Sharp <sharpd@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/fib_rules.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 925539172d5b..77d90ae38114 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -23,8 +23,8 @@ struct fib_rule_hdr {
 	__u8		tos;
 
 	__u8		table;
-	__u8		proto;
-	__u8		res1;	/* reserved */
+	__u8		res1;   /* reserved */
+	__u8		res2;	/* reserved */
 	__u8		action;
 
 	__u32		flags;
@@ -58,6 +58,7 @@ enum {
 	FRA_PAD,
 	FRA_L3MDEV,	/* iif or oif is l3mdev goto its table */
 	FRA_UID_RANGE,	/* UID range */
+	FRA_PROTOCOL,   /* Originator of the rule */
 	__FRA_MAX
 };
 
-- 
cgit v1.2.3


From 6b1aea8cf2c8618146edaf6b35775ab55f7cafe5 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Mon, 1 Jan 2018 00:00:00 +0100
Subject: batman-adv: Update copyright years for 2018

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batadv_packet.h | 2 +-
 include/uapi/linux/batman_adv.h    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h
index 5cb360be2a11..daefd7283896 100644
--- a/include/uapi/linux/batadv_packet.h
+++ b/include/uapi/linux/batadv_packet.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index ae00c99cbed0..56ae28934070 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: MIT */
-/* Copyright (C) 2016-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
-- 
cgit v1.2.3


From a163dc22d515d17844435c8217ff66193d35b3fa Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Wed, 24 Jan 2018 12:21:37 +0100
Subject: batman-adv: always assume 2-byte packet alignment

NIC drivers generally try to ensure that the "network header" is aligned
to a 4-byte boundary. This is not always possible: When Ethernet frames are
encapsulated in other packets with 4-byte aligned headers, the inner
Ethernet header will have 4-byte alignment, and in consequence, the inner
network header is aligned to 2, but not to 4 bytes.

Most parts of batman-adv only care about 2-byte alignment; in particular,
no unaligned accesses occur in performance-critical paths that handle
actual payload data. This is not true for OGM handling: the seqno and crc
fields are accessed as 32-bit values. To avoid these unaligned accesses,
this patch reduces the expected packet alignment to 2 bytes for all of
batadv's packet types.

As no unaligned accesses existed on the performance-critical paths anyways,
this chance does have any (positive or negative) effect on performance, but
it still makes sense to avoid these accesses to prevent log noise when
examining other unaligned accesses in the kernel while batman-adv is
active.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batadv_packet.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h
index daefd7283896..894d8d2f713d 100644
--- a/include/uapi/linux/batadv_packet.h
+++ b/include/uapi/linux/batadv_packet.h
@@ -196,8 +196,6 @@ struct batadv_bla_claim_dst {
 	__be16 group;		/* group id */
 };
 
-#pragma pack()
-
 /**
  * struct batadv_ogm_packet - ogm (routing protocol) packet
  * @packet_type: batman-adv packet type, part of the general header
@@ -222,9 +220,6 @@ struct batadv_ogm_packet {
 	__u8   reserved;
 	__u8   tq;
 	__be16 tvlv_len;
-	/* __packed is not needed as the struct size is divisible by 4,
-	 * and the largest data type in this struct has a size of 4.
-	 */
 };
 
 #define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet)
@@ -249,9 +244,6 @@ struct batadv_ogm2_packet {
 	__u8   orig[ETH_ALEN];
 	__be16 tvlv_len;
 	__be32 throughput;
-	/* __packed is not needed as the struct size is divisible by 4,
-	 * and the largest data type in this struct has a size of 4.
-	 */
 };
 
 #define BATADV_OGM2_HLEN sizeof(struct batadv_ogm2_packet)
@@ -405,7 +397,6 @@ struct batadv_icmp_packet_rr {
  * misalignment of the payload after the ethernet header. It may also lead to
  * leakage of information when the padding it not initialized before sending.
  */
-#pragma pack(2)
 
 /**
  * struct batadv_unicast_packet - unicast packet for network payload
@@ -533,8 +524,6 @@ struct batadv_coded_packet {
 	__be16 coded_len;
 };
 
-#pragma pack()
-
 /**
  * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload
  * @packet_type: batman-adv packet type, part of the general header
@@ -641,4 +630,6 @@ struct batadv_tvlv_mcast_data {
 	__u8 reserved[3];
 };
 
+#pragma pack()
+
 #endif /* _UAPI_LINUX_BATADV_PACKET_H_ */
-- 
cgit v1.2.3


From 401910db4cd425899832a093539222b6174f92a2 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Tue, 27 Feb 2018 09:52:43 -0800
Subject: rds: deliver zerocopy completion notification with data

This commit is an optimization over commit 01883eda72bd
("rds: support for zcopy completion notification") for PF_RDS sockets.

RDS applications are predominantly request-response transactions, so
it is more efficient to reduce the number of system calls and have
zerocopy completion notification delivered as ancillary data on the
POLLIN channel.

Cookies are passed up as ancillary data (at level SOL_RDS) in a
struct rds_zcopy_cookies when the returned value of recvmsg() is
greater than, or equal to, 0. A max of RDS_MAX_ZCOOKIES may be passed
with each message.

This commit removes support for zerocopy completion notification on
MSG_ERRQUEUE for PF_RDS sockets.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/errqueue.h | 2 --
 include/uapi/linux/rds.h      | 7 +++++++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index 28812eda4209..dc64cfaf13da 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -20,13 +20,11 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_ICMP6	3
 #define SO_EE_ORIGIN_TXSTATUS	4
 #define SO_EE_ORIGIN_ZEROCOPY	5
-#define SO_EE_ORIGIN_ZCOOKIE	6
 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
 #define SO_EE_CODE_ZEROCOPY_COPIED	1
-#define	SO_EE_ORIGIN_MAX_ZCOOKIES	8
 
 /**
  *	struct scm_timestamping - timestamps exposed through cmsg
diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index 12e3bca32cad..a66b213de3d7 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -104,6 +104,7 @@
 #define RDS_CMSG_MASKED_ATOMIC_CSWP	9
 #define RDS_CMSG_RXPATH_LATENCY		11
 #define	RDS_CMSG_ZCOPY_COOKIE		12
+#define	RDS_CMSG_ZCOPY_COMPLETION	13
 
 #define RDS_INFO_FIRST			10000
 #define RDS_INFO_COUNTERS		10000
@@ -317,6 +318,12 @@ struct rds_rdma_notify {
 #define RDS_RDMA_DROPPED	3
 #define RDS_RDMA_OTHER_ERROR	4
 
+#define	RDS_MAX_ZCOOKIES	8
+struct rds_zcopy_cookies {
+	__u32 num;
+	__u32 cookies[RDS_MAX_ZCOOKIES];
+};
+
 /*
  * Common set of flags for all RDMA related structs
  */
-- 
cgit v1.2.3


From bfff4862653bb96001ab57c1edd6d03f48e5f035 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Wed, 28 Feb 2018 22:40:16 -0500
Subject: net: fib_rules: support for match on ip_proto, sport and dport

uapi for ip_proto, sport and dport range match
in fib rules.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/fib_rules.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 77d90ae38114..232df14e1287 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -35,6 +35,11 @@ struct fib_rule_uid_range {
 	__u32		end;
 };
 
+struct fib_rule_port_range {
+	__u16		start;
+	__u16		end;
+};
+
 enum {
 	FRA_UNSPEC,
 	FRA_DST,	/* destination address */
@@ -59,6 +64,9 @@ enum {
 	FRA_L3MDEV,	/* iif or oif is l3mdev goto its table */
 	FRA_UID_RANGE,	/* UID range */
 	FRA_PROTOCOL,   /* Originator of the rule */
+	FRA_IP_PROTO,	/* ip proto */
+	FRA_SPORT_RANGE, /* sport */
+	FRA_DPORT_RANGE, /* dport */
 	__FRA_MAX
 };
 
-- 
cgit v1.2.3


From 77a5196a804e34ce5e215ef84d5e1de332e9c529 Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Thu, 1 Mar 2018 13:49:57 -0800
Subject: gre: add sequence number for collect md mode.

Currently GRE sequence number can only be used in native
tunnel mode.  This patch adds sequence number support for
gre collect metadata mode.  RFC2890 defines GRE sequence
number to be specific to the traffic flow identified by the
key.  However, this patch does not implement per-key seqno.
The sequence number is shared in the same tunnel device.
That is, different tunnel keys using the same collect_md
tunnel share single sequence number.

Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index db6bdc375126..2a66769e5875 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -800,6 +800,7 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
 #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
+#define BPF_F_SEQ_NUMBER		(1ULL << 3)
 
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
-- 
cgit v1.2.3


From 87ecc95d81d951b0984f2eb9c5c118cb68d0dce8 Mon Sep 17 00:00:00 2001
From: Priyaranjan Jha <priyarjha@google.com>
Date: Sun, 4 Mar 2018 10:38:35 -0800
Subject: tcp: add send queue size stat in SCM_TIMESTAMPING_OPT_STATS

This patch adds TCP_NLA_SENDQ_SIZE stat into SCM_TIMESTAMPING_OPT_STATS.
It reports no. of bytes present in send queue, when timestamp is
generated.

Signed-off-by: Priyaranjan Jha <priyarjha@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index b4a4f64635fa..93bad2128ef6 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -241,6 +241,7 @@ enum {
 	TCP_NLA_MIN_RTT,        /* minimum RTT */
 	TCP_NLA_RECUR_RETRANS,  /* Recurring retransmits for the current pkt */
 	TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */
+	TCP_NLA_SNDQ_SIZE,	/* Data (bytes) pending in send queue */
 
 };
 
-- 
cgit v1.2.3


From be631892948060f44b1ceee3132be1266932071e Mon Sep 17 00:00:00 2001
From: Priyaranjan Jha <priyarjha@google.com>
Date: Sun, 4 Mar 2018 10:38:36 -0800
Subject: tcp: add ca_state stat in SCM_TIMESTAMPING_OPT_STATS

This patch adds TCP_NLA_CA_STATE stat into SCM_TIMESTAMPING_OPT_STATS.
It reports ca_state of socket, when timestamp is generated.

Signed-off-by: Priyaranjan Jha <priyarjha@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 93bad2128ef6..4c0ae0faf7ca 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -242,6 +242,7 @@ enum {
 	TCP_NLA_RECUR_RETRANS,  /* Recurring retransmits for the current pkt */
 	TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */
 	TCP_NLA_SNDQ_SIZE,	/* Data (bytes) pending in send queue */
+	TCP_NLA_CA_STATE,	/* ca_state of socket */
 
 };
 
-- 
cgit v1.2.3


From 955dc68cb9b23b42999cafe6df3684309bc686c6 Mon Sep 17 00:00:00 2001
From: Samuel Mendoza-Jonas <sam@mendozajonas.com>
Date: Mon, 5 Mar 2018 11:39:05 +1100
Subject: net/ncsi: Add generic netlink family

Add a generic netlink family for NCSI. This supports three commands;
NCSI_CMD_PKG_INFO which returns information on packages and their
associated channels, NCSI_CMD_SET_INTERFACE which allows a specific
package or package/channel combination to be set as the preferred
choice, and NCSI_CMD_CLEAR_INTERFACE which clears any preferred setting.

Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ncsi.h | 115 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 include/uapi/linux/ncsi.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ncsi.h b/include/uapi/linux/ncsi.h
new file mode 100644
index 000000000000..4c292ecbb748
--- /dev/null
+++ b/include/uapi/linux/ncsi.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __UAPI_NCSI_NETLINK_H__
+#define __UAPI_NCSI_NETLINK_H__
+
+/**
+ * enum ncsi_nl_commands - supported NCSI commands
+ *
+ * @NCSI_CMD_UNSPEC: unspecified command to catch errors
+ * @NCSI_CMD_PKG_INFO: list package and channel attributes. Requires
+ *	NCSI_ATTR_IFINDEX. If NCSI_ATTR_PACKAGE_ID is specified returns the
+ *	specific package and its channels - otherwise a dump request returns
+ *	all packages and their associated channels.
+ * @NCSI_CMD_SET_INTERFACE: set preferred package and channel combination.
+ *	Requires NCSI_ATTR_IFINDEX and the preferred NCSI_ATTR_PACKAGE_ID and
+ *	optionally the preferred NCSI_ATTR_CHANNEL_ID.
+ * @NCSI_CMD_CLEAR_INTERFACE: clear any preferred package/channel combination.
+ *	Requires NCSI_ATTR_IFINDEX.
+ * @NCSI_CMD_MAX: highest command number
+ */
+enum ncsi_nl_commands {
+	NCSI_CMD_UNSPEC,
+	NCSI_CMD_PKG_INFO,
+	NCSI_CMD_SET_INTERFACE,
+	NCSI_CMD_CLEAR_INTERFACE,
+
+	__NCSI_CMD_AFTER_LAST,
+	NCSI_CMD_MAX = __NCSI_CMD_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_attrs - General NCSI netlink attributes
+ *
+ * @NCSI_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_ATTR_IFINDEX: ifindex of network device using NCSI
+ * @NCSI_ATTR_PACKAGE_LIST: nested array of NCSI_PKG_ATTR attributes
+ * @NCSI_ATTR_PACKAGE_ID: package ID
+ * @NCSI_ATTR_CHANNEL_ID: channel ID
+ * @NCSI_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_attrs {
+	NCSI_ATTR_UNSPEC,
+	NCSI_ATTR_IFINDEX,
+	NCSI_ATTR_PACKAGE_LIST,
+	NCSI_ATTR_PACKAGE_ID,
+	NCSI_ATTR_CHANNEL_ID,
+
+	__NCSI_ATTR_AFTER_LAST,
+	NCSI_ATTR_MAX = __NCSI_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_pkg_attrs - NCSI netlink package-specific attributes
+ *
+ * @NCSI_PKG_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_PKG_ATTR: nested array of package attributes
+ * @NCSI_PKG_ATTR_ID: package ID
+ * @NCSI_PKG_ATTR_FORCED: flag signifying a package has been set as preferred
+ * @NCSI_PKG_ATTR_CHANNEL_LIST: nested array of NCSI_CHANNEL_ATTR attributes
+ * @NCSI_PKG_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_pkg_attrs {
+	NCSI_PKG_ATTR_UNSPEC,
+	NCSI_PKG_ATTR,
+	NCSI_PKG_ATTR_ID,
+	NCSI_PKG_ATTR_FORCED,
+	NCSI_PKG_ATTR_CHANNEL_LIST,
+
+	__NCSI_PKG_ATTR_AFTER_LAST,
+	NCSI_PKG_ATTR_MAX = __NCSI_PKG_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_channel_attrs - NCSI netlink channel-specific attributes
+ *
+ * @NCSI_CHANNEL_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_CHANNEL_ATTR: nested array of channel attributes
+ * @NCSI_CHANNEL_ATTR_ID: channel ID
+ * @NCSI_CHANNEL_ATTR_VERSION_MAJOR: channel major version number
+ * @NCSI_CHANNEL_ATTR_VERSION_MINOR: channel minor version number
+ * @NCSI_CHANNEL_ATTR_VERSION_STR: channel version string
+ * @NCSI_CHANNEL_ATTR_LINK_STATE: channel link state flags
+ * @NCSI_CHANNEL_ATTR_ACTIVE: channels with this flag are in
+ *	NCSI_CHANNEL_ACTIVE state
+ * @NCSI_CHANNEL_ATTR_FORCED: flag signifying a channel has been set as
+ *	preferred
+ * @NCSI_CHANNEL_ATTR_VLAN_LIST: nested array of NCSI_CHANNEL_ATTR_VLAN_IDs
+ * @NCSI_CHANNEL_ATTR_VLAN_ID: VLAN ID being filtered on this channel
+ * @NCSI_CHANNEL_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_channel_attrs {
+	NCSI_CHANNEL_ATTR_UNSPEC,
+	NCSI_CHANNEL_ATTR,
+	NCSI_CHANNEL_ATTR_ID,
+	NCSI_CHANNEL_ATTR_VERSION_MAJOR,
+	NCSI_CHANNEL_ATTR_VERSION_MINOR,
+	NCSI_CHANNEL_ATTR_VERSION_STR,
+	NCSI_CHANNEL_ATTR_LINK_STATE,
+	NCSI_CHANNEL_ATTR_ACTIVE,
+	NCSI_CHANNEL_ATTR_FORCED,
+	NCSI_CHANNEL_ATTR_VLAN_LIST,
+	NCSI_CHANNEL_ATTR_VLAN_ID,
+
+	__NCSI_CHANNEL_ATTR_AFTER_LAST,
+	NCSI_CHANNEL_ATTR_MAX = __NCSI_CHANNEL_ATTR_AFTER_LAST - 1
+};
+
+#endif /* __UAPI_NCSI_NETLINK_H__ */
-- 
cgit v1.2.3


From ed63afb8a318f6b3558d76afba7809daee4f28e5 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 5 Mar 2018 20:44:18 +0800
Subject: sctp: add support for PR-SCTP Information for sendmsg

This patch is to add support for PR-SCTP Information for sendmsg,
as described in section 5.3.7 of RFC6458.

With this option, you can specify pr_policy and pr_value for user
data in sendmsg.

It's also a necessary send info for sctp_sendv.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 4c4db14786bd..0dd1f82a4fa8 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -260,6 +260,19 @@ struct sctp_nxtinfo {
 	sctp_assoc_t nxt_assoc_id;
 };
 
+/* 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO)
+ *
+ *   This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   -------------------
+ *   IPPROTO_SCTP  SCTP_PRINFO    struct sctp_prinfo
+ */
+struct sctp_prinfo {
+	__u16 pr_policy;
+	__u32 pr_value;
+};
+
 /*
  *  sinfo_flags: 16 bits (unsigned integer)
  *
@@ -293,6 +306,8 @@ typedef enum sctp_cmsg_type {
 #define SCTP_RCVINFO	SCTP_RCVINFO
 	SCTP_NXTINFO,		/* 5.3.6 SCTP Next Receive Information Structure */
 #define SCTP_NXTINFO	SCTP_NXTINFO
+	SCTP_PRINFO,		/* 5.3.7 SCTP PR-SCTP Information Structure */
+#define SCTP_PRINFO	SCTP_PRINFO
 } sctp_cmsg_t;
 
 /*
-- 
cgit v1.2.3


From 2c0dbaa0c43d04d8d6daf52adb724c5789676b15 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 5 Mar 2018 20:44:19 +0800
Subject: sctp: add support for SCTP_DSTADDRV4/6 Information for sendmsg

This patch is to add support for Destination IPv4/6 Address options
for sendmsg, as described in section 5.3.9/10 of RFC6458.

With this option, you can provide more than one destination addrs
to sendmsg when creating asoc, like sctp_connectx.

It's also a necessary send info for sctp_sendv.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 0dd1f82a4fa8..a1bc35098033 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -308,6 +308,12 @@ typedef enum sctp_cmsg_type {
 #define SCTP_NXTINFO	SCTP_NXTINFO
 	SCTP_PRINFO,		/* 5.3.7 SCTP PR-SCTP Information Structure */
 #define SCTP_PRINFO	SCTP_PRINFO
+	SCTP_AUTHINFO,		/* 5.3.8 SCTP AUTH Information Structure (RESERVED) */
+#define SCTP_AUTHINFO	SCTP_AUTHINFO
+	SCTP_DSTADDRV4,		/* 5.3.9 SCTP Destination IPv4 Address Structure */
+#define SCTP_DSTADDRV4	SCTP_DSTADDRV4
+	SCTP_DSTADDRV6,		/* 5.3.10 SCTP Destination IPv6 Address Structure */
+#define SCTP_DSTADDRV6	SCTP_DSTADDRV6
 } sctp_cmsg_t;
 
 /*
-- 
cgit v1.2.3


From 4910280503f3af2857d5aa77e35b22d93a8960a8 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 5 Mar 2018 20:44:20 +0800
Subject: sctp: add support for snd flag SCTP_SENDALL process in sendmsg

This patch is to add support for snd flag SCTP_SENDALL process
in sendmsg, as described in section 5.3.4 of RFC6458.

With this flag, you can send the same data to all the asocs of
this sk once.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index a1bc35098033..e94b6d297ad9 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -284,6 +284,8 @@ enum sctp_sinfo_flags {
 	SCTP_ADDR_OVER		= (1 << 1), /* Override the primary destination. */
 	SCTP_ABORT		= (1 << 2), /* Send an ABORT message to the peer. */
 	SCTP_SACK_IMMEDIATELY	= (1 << 3), /* SACK should be sent without delay. */
+	/* 2 bits here have been used by SCTP_PR_SCTP_MASK */
+	SCTP_SENDALL		= (1 << 6),
 	SCTP_NOTIFICATION	= MSG_NOTIFICATION, /* Next message is not user msg but notification. */
 	SCTP_EOF		= MSG_FIN,  /* Initiate graceful shutdown process. */
 };
-- 
cgit v1.2.3


From 95da0cdb723260362fc126a563285ac66a193da7 Mon Sep 17 00:00:00 2001
From: Teng Qin <qinteng@fb.com>
Date: Tue, 6 Mar 2018 10:55:01 -0800
Subject: bpf: add support to read sample address in bpf program

This commit adds new field "addr" to bpf_perf_event_data which could be
read and used by bpf programs attached to perf events. The value of the
field is copied from bpf_perf_event_data_kern.addr and contains the
address value recorded by specifying sample_type with PERF_SAMPLE_ADDR
when calling perf_event_open.

Signed-off-by: Teng Qin <qinteng@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf_perf_event.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h
index 8f95303f9d80..eb1b9d21250c 100644
--- a/include/uapi/linux/bpf_perf_event.h
+++ b/include/uapi/linux/bpf_perf_event.h
@@ -13,6 +13,7 @@
 struct bpf_perf_event_data {
 	bpf_user_pt_regs_t regs;
 	__u64 sample_period;
+	__u64 addr;
 };
 
 #endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
-- 
cgit v1.2.3


From 459d153d9916ea48b1550bbb6f2959dc03bff011 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Tue, 6 Mar 2018 18:11:14 +0100
Subject: net/sched: cls_flower: Add support to handle first frag as match
 field

Allow setting firstfrag as matching option in tc flower classifier.

 # tc filter add dev eth0 protocol ip parent ffff: \
     flower indev eth0 \
        ip_flags firstfrag
     action mirred egress redirect dev eth1

Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 7cafb26df555..be05e66c167b 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -475,6 +475,7 @@ enum {
 
 enum {
 	TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
+	TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
 };
 
 /* Match-all classifier */
-- 
cgit v1.2.3


From 84a1d9c4820080bebcbd413a845076dcb62f45fa Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Thu, 8 Mar 2018 15:45:03 +0000
Subject: net: ethtool: extend RXNFC API to support RSS spreading of filter
 matches

We use a two-step process to configure a filter with RSS spreading.  First,
 the RSS context is allocated and configured using ETHTOOL_SRSSH; this
 returns an identifier (rss_context) which can then be passed to subsequent
 invocations of ETHTOOL_SRXCLSRLINS to specify that the offset from the RSS
 indirection table lookup should be added to the queue number (ring_cookie)
 when delivering the packet.  Drivers for devices which can only use the
 indirection table entry directly (not add it to a base queue number)
 should reject rule insertions combining RSS with a nonzero ring_cookie.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 44a0b675a6bc..20da156aaf64 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -914,12 +914,15 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * @flow_type: Type of flow to be affected, e.g. %TCP_V4_FLOW
  * @data: Command-dependent value
  * @fs: Flow classification rule
+ * @rss_context: RSS context to be affected
  * @rule_cnt: Number of rules to be affected
  * @rule_locs: Array of used rule locations
  *
  * For %ETHTOOL_GRXFH and %ETHTOOL_SRXFH, @data is a bitmask indicating
  * the fields included in the flow hash, e.g. %RXH_IP_SRC.  The following
- * structure fields must not be used.
+ * structure fields must not be used, except that if @flow_type includes
+ * the %FLOW_RSS flag, then @rss_context determines which RSS context to
+ * act on.
  *
  * For %ETHTOOL_GRXRINGS, @data is set to the number of RX rings/queues
  * on return.
@@ -931,7 +934,9 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * set in @data then special location values should not be used.
  *
  * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the location of an
- * existing rule on entry and @fs contains the rule on return.
+ * existing rule on entry and @fs contains the rule on return; if
+ * @fs.@flow_type includes the %FLOW_RSS flag, then @rss_context is
+ * filled with the RSS context ID associated with the rule.
  *
  * For %ETHTOOL_GRXCLSRLALL, @rule_cnt specifies the array size of the
  * user buffer for @rule_locs on entry.  On return, @data is the size
@@ -942,7 +947,11 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * For %ETHTOOL_SRXCLSRLINS, @fs specifies the rule to add or update.
  * @fs.@location either specifies the location to use or is a special
  * location value with %RX_CLS_LOC_SPECIAL flag set.  On return,
- * @fs.@location is the actual rule location.
+ * @fs.@location is the actual rule location.  If @fs.@flow_type
+ * includes the %FLOW_RSS flag, @rss_context is the RSS context ID to
+ * use for flow spreading traffic which matches this rule.  The value
+ * from the rxfh indirection table will be added to @fs.@ring_cookie
+ * to choose which ring to deliver to.
  *
  * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the location of an
  * existing rule on entry.
@@ -963,7 +972,10 @@ struct ethtool_rxnfc {
 	__u32				flow_type;
 	__u64				data;
 	struct ethtool_rx_flow_spec	fs;
-	__u32				rule_cnt;
+	union {
+		__u32			rule_cnt;
+		__u32			rss_context;
+	};
 	__u32				rule_locs[0];
 };
 
@@ -990,7 +1002,11 @@ struct ethtool_rxfh_indir {
 /**
  * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key.
  * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH
- * @rss_context: RSS context identifier.
+ * @rss_context: RSS context identifier.  Context 0 is the default for normal
+ *	traffic; other contexts can be referenced as the destination for RX flow
+ *	classification rules.  %ETH_RXFH_CONTEXT_ALLOC is used with command
+ *	%ETHTOOL_SRSSH to allocate a new RSS context; on return this field will
+ *	contain the ID of the newly allocated context.
  * @indir_size: On entry, the array size of the user buffer for the
  *	indirection table, which may be zero, or (for %ETHTOOL_SRSSH),
  *	%ETH_RXFH_INDIR_NO_CHANGE.  On return from %ETHTOOL_GRSSH,
@@ -1009,7 +1025,8 @@ struct ethtool_rxfh_indir {
  * size should be returned.  For %ETHTOOL_SRSSH, an @indir_size of
  * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested
  * and a @indir_size of zero means the indir table should be reset to default
- * values. An hfunc of zero means that hash function setting is not requested.
+ * values (if @rss_context == 0) or that the RSS context should be deleted.
+ * An hfunc of zero means that hash function setting is not requested.
  */
 struct ethtool_rxfh {
 	__u32   cmd;
@@ -1021,6 +1038,7 @@ struct ethtool_rxfh {
 	__u32	rsvd32;
 	__u32   rss_config[0];
 };
+#define ETH_RXFH_CONTEXT_ALLOC		0xffffffff
 #define ETH_RXFH_INDIR_NO_CHANGE	0xffffffff
 
 /**
@@ -1635,6 +1653,8 @@ static inline int ethtool_validate_duplex(__u8 duplex)
 /* Flag to enable additional fields in struct ethtool_rx_flow_spec */
 #define	FLOW_EXT	0x80000000
 #define	FLOW_MAC_EXT	0x40000000
+/* Flag to enable RSS spreading of traffic matching rule (nfc only) */
+#define	FLOW_RSS	0x20000000
 
 /* L3-L4 network traffic flow hash options */
 #define	RXH_L2DA	(1 << 1)
-- 
cgit v1.2.3


From 41aeefcc38a2643a62db46818a70a781efb40d99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Tue, 13 Mar 2018 11:41:12 +0100
Subject: batman-adv: add DAT cache netlink support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Dump the list of DAT cache entries via the netlink socket.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 56ae28934070..95ab5dbd09fa 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -272,6 +272,21 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_BLA_CRC,
 
+	/**
+	 * @BATADV_ATTR_DAT_CACHE_IP4ADDRESS: Client IPv4 address
+	 */
+	BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
+
+	/**
+	 * @BATADV_ATTR_DAT_CACHE_HWADDRESS: Client MAC address
+	 */
+	BATADV_ATTR_DAT_CACHE_HWADDRESS,
+
+	/**
+	 * @BATADV_ATTR_DAT_CACHE_VID: VLAN ID
+	 */
+	BATADV_ATTR_DAT_CACHE_VID,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
@@ -361,6 +376,11 @@ enum batadv_nl_commands {
 	 */
 	BATADV_CMD_GET_BLA_BACKBONE,
 
+	/**
+	 * @BATADV_CMD_GET_DAT_CACHE: Query list of DAT cache entries
+	 */
+	BATADV_CMD_GET_DAT_CACHE,
+
 	/* add new commands above here */
 
 	/**
-- 
cgit v1.2.3


From 53dd9a68ba683986ec90497586f94b941bb748a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Tue, 13 Mar 2018 11:41:13 +0100
Subject: batman-adv: add multicast flags netlink support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Dump the list of multicast flags entries via the netlink socket.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h | 62 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 95ab5dbd09fa..324a0e1143e7 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -91,6 +91,53 @@ enum batadv_tt_client_flags {
 	BATADV_TT_CLIENT_TEMP	 = (1 << 11),
 };
 
+/**
+ * enum batadv_mcast_flags_priv - Private, own multicast flags
+ *
+ * These are internal, multicast related flags. Currently they describe certain
+ * multicast related attributes of the segment this originator bridges into the
+ * mesh.
+ *
+ * Those attributes are used to determine the public multicast flags this
+ * originator is going to announce via TT.
+ *
+ * For netlink, if BATADV_MCAST_FLAGS_BRIDGED is unset then all querier
+ * related flags are undefined.
+ */
+enum batadv_mcast_flags_priv {
+	/**
+	 * @BATADV_MCAST_FLAGS_BRIDGED: There is a bridge on top of the mesh
+	 * interface.
+	 */
+	BATADV_MCAST_FLAGS_BRIDGED			= (1 << 0),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS: Whether an IGMP querier
+	 * exists in the mesh
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS		= (1 << 1),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS: Whether an MLD querier
+	 * exists in the mesh
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS		= (1 << 2),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING: If an IGMP querier
+	 * exists, whether it is potentially shadowing multicast listeners
+	 * (i.e. querier is behind our own bridge segment)
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING	= (1 << 3),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING: If an MLD querier
+	 * exists, whether it is potentially shadowing multicast listeners
+	 * (i.e. querier is behind our own bridge segment)
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING	= (1 << 4),
+};
+
 /**
  * enum batadv_nl_attrs - batman-adv netlink attributes
  */
@@ -287,6 +334,16 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_DAT_CACHE_VID,
 
+	/**
+	 * @BATADV_ATTR_MCAST_FLAGS: Per originator multicast flags
+	 */
+	BATADV_ATTR_MCAST_FLAGS,
+
+	/**
+	 * @BATADV_ATTR_MCAST_FLAGS_PRIV: Private, own multicast flags
+	 */
+	BATADV_ATTR_MCAST_FLAGS_PRIV,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
@@ -381,6 +438,11 @@ enum batadv_nl_commands {
 	 */
 	BATADV_CMD_GET_DAT_CACHE,
 
+	/**
+	 * @BATADV_CMD_GET_MCAST_FLAGS: Query list of multicast flags
+	 */
+	BATADV_CMD_GET_MCAST_FLAGS,
+
 	/* add new commands above here */
 
 	/**
-- 
cgit v1.2.3


From 3ff547c06a7d75d72d37dae2c064fcf0672e56c0 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 14 Mar 2018 19:05:31 +0800
Subject: sctp: add support for SCTP AUTH Information for sendmsg

This patch is to add support for SCTP AUTH Information for sendmsg,
as described in section 5.3.8 of RFC6458.

With this option, you can provide shared key identifier used for
sending the user message.

It's also a necessary send info for sctp_sendv.

Note that it reuses sinfo->sinfo_tsn to indicate if this option is
set and sinfo->sinfo_ssn to save the shkey ID which can be 0.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index e94b6d297ad9..47e781ebde05 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -273,6 +273,18 @@ struct sctp_prinfo {
 	__u32 pr_value;
 };
 
+/* 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO)
+ *
+ *   This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   -------------------
+ *   IPPROTO_SCTP  SCTP_AUTHINFO  struct sctp_authinfo
+ */
+struct sctp_authinfo {
+	__u16 auth_keynumber;
+};
+
 /*
  *  sinfo_flags: 16 bits (unsigned integer)
  *
@@ -310,7 +322,7 @@ typedef enum sctp_cmsg_type {
 #define SCTP_NXTINFO	SCTP_NXTINFO
 	SCTP_PRINFO,		/* 5.3.7 SCTP PR-SCTP Information Structure */
 #define SCTP_PRINFO	SCTP_PRINFO
-	SCTP_AUTHINFO,		/* 5.3.8 SCTP AUTH Information Structure (RESERVED) */
+	SCTP_AUTHINFO,		/* 5.3.8 SCTP AUTH Information Structure */
 #define SCTP_AUTHINFO	SCTP_AUTHINFO
 	SCTP_DSTADDRV4,		/* 5.3.9 SCTP Destination IPv4 Address Structure */
 #define SCTP_DSTADDRV4	SCTP_DSTADDRV4
-- 
cgit v1.2.3


From 601590ec155aadf5daa17a6f63a06d1bba5b5ce9 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 14 Mar 2018 19:05:32 +0800
Subject: sctp: add sockopt SCTP_AUTH_DEACTIVATE_KEY

This patch is to add sockopt SCTP_AUTH_DEACTIVATE_KEY, as described in
section 8.3.4 of RFC6458.

This set option indicates that the application will no longer send user
messages using the indicated key identifier.

Note that RFC requires that only deactivated keys that are no longer used
by an association can be deleted, but for the backward compatibility, it
is not to check deactivated when deleting or replacing one sh_key.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 47e781ebde05..08fc313829f4 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -99,6 +99,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_RECVRCVINFO	32
 #define SCTP_RECVNXTINFO	33
 #define SCTP_DEFAULT_SNDINFO	34
+#define SCTP_AUTH_DEACTIVATE_KEY	35
 
 /* Internal Socket Options. Some of the sctp library functions are
  * implemented using these socket options.
-- 
cgit v1.2.3


From ec2e506c680deaa8e1a087986db6d73ba63a04be Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 14 Mar 2018 19:05:33 +0800
Subject: sctp: add SCTP_AUTH_FREE_KEY type for AUTHENTICATION_EVENT

This patch is to add SCTP_AUTH_FREE_KEY type for AUTHENTICATION_EVENT,
as described in section 6.1.8 of RFC6458.

      SCTP_AUTH_FREE_KEY:  This report indicates that the SCTP
         implementation will no longer use the key identifier specified
         in auth_keynumber.

After deactivating a key, it would never be used again, which means
it's refcnt can't be held/increased by new chunks. But there may be
some chunks in out queue still using it. So only when refcnt is 1,
which means no chunk in outqueue is using/holding this key either,
this EVENT would be sent.

When users receive this notification, they could do DEL_KEY sockopt to
remove this shkey, and also tell the peer that this key won't be used
in any chunk thoroughly from now on, then the peer can remove it as
well safely.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 08fc313829f4..18ebbfeee4af 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -518,7 +518,11 @@ struct sctp_authkey_event {
 	sctp_assoc_t auth_assoc_id;
 };
 
-enum { SCTP_AUTH_NEWKEY = 0, };
+enum {
+	SCTP_AUTH_NEW_KEY,
+#define	SCTP_AUTH_NEWKEY	SCTP_AUTH_NEW_KEY /* compatible with before */
+	SCTP_AUTH_FREE_KEY,
+};
 
 /*
  * 6.1.9. SCTP_SENDER_DRY_EVENT
-- 
cgit v1.2.3


From 30f6ebf65bc46161c5aaff1db2e6e7c76aa4a06b Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 14 Mar 2018 19:05:34 +0800
Subject: sctp: add SCTP_AUTH_NO_AUTH type for AUTHENTICATION_EVENT

This patch is to add SCTP_AUTH_NO_AUTH type for AUTHENTICATION_EVENT,
as described in section 6.1.8 of RFC6458.

      SCTP_AUTH_NO_AUTH:  This report indicates that the peer does not
         support SCTP authentication as defined in [RFC4895].

Note that the implementation is quite similar as that of
SCTP_ADAPTATION_INDICATION.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 18ebbfeee4af..afd4346386e0 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -522,6 +522,7 @@ enum {
 	SCTP_AUTH_NEW_KEY,
 #define	SCTP_AUTH_NEWKEY	SCTP_AUTH_NEW_KEY /* compatible with before */
 	SCTP_AUTH_FREE_KEY,
+	SCTP_AUTH_NO_AUTH,
 };
 
 /*
-- 
cgit v1.2.3


From 615755a77b2461ed78dfafb8a6649456201949c7 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Wed, 14 Mar 2018 10:23:21 -0700
Subject: bpf: extend stackmap to save binary_build_id+offset instead of
 address

Currently, bpf stackmap store address for each entry in the call trace.
To map these addresses to user space files, it is necessary to maintain
the mapping from these virtual address to symbols in the binary. Usually,
the user space profiler (such as perf) has to scan /proc/pid/maps at the
beginning of profiling, and monitor mmap2() calls afterwards. Given the
cost of maintaining the address map, this solution is not practical for
system wide profiling that is always on.

This patch tries to solve this problem with a variation of stackmap. This
variation is enabled by flag BPF_F_STACK_BUILD_ID. Instead of storing
addresses, the variation stores ELF file build_id + offset.

Build ID is a 20-byte unique identifier for ELF files. The following
command shows the Build ID of /bin/bash:

  [user@]$ readelf -n /bin/bash
  ...
    Build ID: XXXXXXXXXX
  ...

With BPF_F_STACK_BUILD_ID, bpf_get_stackid() tries to parse Build ID
for each entry in the call trace, and translate it into the following
struct:

  struct bpf_stack_build_id_offset {
          __s32           status;
          unsigned char   build_id[BPF_BUILD_ID_SIZE];
          union {
                  __u64   offset;
                  __u64   ip;
          };
  };

The search of build_id is limited to the first page of the file, and this
page should be in page cache. Otherwise, we fallback to store ip for this
entry (ip field in struct bpf_stack_build_id_offset). This requires the
build_id to be stored in the first page. A quick survey of binary and
dynamic library files in a few different systems shows that almost all
binary and dynamic library files have build_id in the first page.

Build_id is only meaningful for user stack. If a kernel stack is added to
a stackmap with BPF_F_STACK_BUILD_ID, it will automatically fallback to
only store ip (status == BPF_STACK_BUILD_ID_IP). Similarly, if build_id
lookup failed for some reason, it will also fallback to store ip.

User space can access struct bpf_stack_build_id_offset with bpf
syscall BPF_MAP_LOOKUP_ELEM. It is necessary for user space to
maintain mapping from build id to binary files. This mostly static
mapping is much easier to maintain than per process address maps.

Note: Stackmap with build_id only works in non-nmi context at this time.
This is because we need to take mm->mmap_sem for find_vma(). If this
changes, we would like to allow build_id lookup in nmi context.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2a66769e5875..1e15d1724d89 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -231,6 +231,28 @@ enum bpf_attach_type {
 #define BPF_F_RDONLY		(1U << 3)
 #define BPF_F_WRONLY		(1U << 4)
 
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID	(1U << 5)
+
+enum bpf_stack_build_id_status {
+	/* user space need an empty entry to identify end of a trace */
+	BPF_STACK_BUILD_ID_EMPTY = 0,
+	/* with valid build_id and offset */
+	BPF_STACK_BUILD_ID_VALID = 1,
+	/* couldn't get build_id, fallback to ip */
+	BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+	__s32		status;
+	unsigned char	build_id[BPF_BUILD_ID_SIZE];
+	union {
+		__u64	offset;
+		__u64	ip;
+	};
+};
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
-- 
cgit v1.2.3


From 7156d194a0772f733865267e7207e0b08f81b02b Mon Sep 17 00:00:00 2001
From: Yousuk Seung <ysseung@google.com>
Date: Fri, 16 Mar 2018 10:51:07 -0700
Subject: tcp: add snd_ssthresh stat in SCM_TIMESTAMPING_OPT_STATS

This patch adds TCP_NLA_SND_SSTHRESH stat into SCM_TIMESTAMPING_OPT_STATS
that reports tcp_sock.snd_ssthresh.

Signed-off-by: Yousuk Seung <ysseung@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Priyaranjan Jha <priyarjha@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 4c0ae0faf7ca..560374c978f9 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -243,6 +243,7 @@ enum {
 	TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */
 	TCP_NLA_SNDQ_SIZE,	/* Data (bytes) pending in send queue */
 	TCP_NLA_CA_STATE,	/* ca_state of socket */
+	TCP_NLA_SND_SSTHRESH,	/* Slow start size threshold */
 
 };
 
-- 
cgit v1.2.3


From 928df1880e24bcd47d6359ff86df24db3dfba3c3 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Mar 2018 16:48:51 +0100
Subject: tipc: obsolete TIPC_ZONE_SCOPE

Publications for TIPC_CLUSTER_SCOPE and TIPC_ZONE_SCOPE are in all
aspects handled the same way, both on the publishing node and on the
receiving nodes.

Despite previous ambitions to the contrary, this is never going to change,
so we take the conseqeunce of this and obsolete TIPC_ZONE_SCOPE and related
macros/functions. Whenever a user is doing a bind() or a sendmsg() attempt
using ZONE_SCOPE we translate this internally to CLUSTER_SCOPE, while we
remain compatible with users and remote nodes still using ZONE_SCOPE.

Furthermore, the non-formalized scope value 0 has always been permitted
for use during lookup, with the same meaning as ZONE_SCOPE/CLUSTER_SCOPE.
We now permit it even as binding scope, but for compatibility reasons we
choose to not change the value of TIPC_CLUSTER_SCOPE.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc.h | 102 ++++++++++++++++++++++++----------------------
 1 file changed, 54 insertions(+), 48 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 14bacc7e6cef..4ac9f1f02b06 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -61,50 +61,6 @@ struct tipc_name_seq {
 	__u32 upper;
 };
 
-/* TIPC Address Size, Offset, Mask specification for Z.C.N
- */
-#define TIPC_NODE_BITS          12
-#define TIPC_CLUSTER_BITS       12
-#define TIPC_ZONE_BITS          8
-
-#define TIPC_NODE_OFFSET        0
-#define TIPC_CLUSTER_OFFSET     TIPC_NODE_BITS
-#define TIPC_ZONE_OFFSET        (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS)
-
-#define TIPC_NODE_SIZE          ((1UL << TIPC_NODE_BITS) - 1)
-#define TIPC_CLUSTER_SIZE       ((1UL << TIPC_CLUSTER_BITS) - 1)
-#define TIPC_ZONE_SIZE          ((1UL << TIPC_ZONE_BITS) - 1)
-
-#define TIPC_NODE_MASK		(TIPC_NODE_SIZE << TIPC_NODE_OFFSET)
-#define TIPC_CLUSTER_MASK	(TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET)
-#define TIPC_ZONE_MASK		(TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET)
-
-#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK)
-
-static inline __u32 tipc_addr(unsigned int zone,
-			      unsigned int cluster,
-			      unsigned int node)
-{
-	return (zone << TIPC_ZONE_OFFSET) |
-		(cluster << TIPC_CLUSTER_OFFSET) |
-		node;
-}
-
-static inline unsigned int tipc_zone(__u32 addr)
-{
-	return addr >> TIPC_ZONE_OFFSET;
-}
-
-static inline unsigned int tipc_cluster(__u32 addr)
-{
-	return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET;
-}
-
-static inline unsigned int tipc_node(__u32 addr)
-{
-	return addr & TIPC_NODE_MASK;
-}
-
 /*
  * Application-accessible port name types
  */
@@ -117,9 +73,10 @@ static inline unsigned int tipc_node(__u32 addr)
 /*
  * Publication scopes when binding port names and port name sequences
  */
-#define TIPC_ZONE_SCOPE         1
-#define TIPC_CLUSTER_SCOPE      2
-#define TIPC_NODE_SCOPE         3
+enum tipc_scope {
+	TIPC_CLUSTER_SCOPE = 2, /* 0 can also be used */
+	TIPC_NODE_SCOPE    = 3
+};
 
 /*
  * Limiting values for messages
@@ -243,7 +200,7 @@ struct sockaddr_tipc {
 struct tipc_group_req {
 	__u32 type;      /* group id */
 	__u32 instance;  /* member id */
-	__u32 scope;     /* zone/cluster/node */
+	__u32 scope;     /* cluster/node */
 	__u32 flags;
 };
 
@@ -268,4 +225,53 @@ struct tipc_sioc_ln_req {
 	__u32 bearer_id;
 	char linkname[TIPC_MAX_LINK_NAME];
 };
+
+
+/* The macros and functions below are deprecated:
+ */
+
+#define TIPC_ZONE_SCOPE         1
+
+#define TIPC_NODE_BITS          12
+#define TIPC_CLUSTER_BITS       12
+#define TIPC_ZONE_BITS          8
+
+#define TIPC_NODE_OFFSET        0
+#define TIPC_CLUSTER_OFFSET     TIPC_NODE_BITS
+#define TIPC_ZONE_OFFSET        (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS)
+
+#define TIPC_NODE_SIZE          ((1UL << TIPC_NODE_BITS) - 1)
+#define TIPC_CLUSTER_SIZE       ((1UL << TIPC_CLUSTER_BITS) - 1)
+#define TIPC_ZONE_SIZE          ((1UL << TIPC_ZONE_BITS) - 1)
+
+#define TIPC_NODE_MASK		(TIPC_NODE_SIZE << TIPC_NODE_OFFSET)
+#define TIPC_CLUSTER_MASK	(TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET)
+#define TIPC_ZONE_MASK		(TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET)
+
+#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK)
+
+static inline __u32 tipc_addr(unsigned int zone,
+			      unsigned int cluster,
+			      unsigned int node)
+{
+	return (zone << TIPC_ZONE_OFFSET) |
+		(cluster << TIPC_CLUSTER_OFFSET) |
+		node;
+}
+
+static inline unsigned int tipc_zone(__u32 addr)
+{
+	return addr >> TIPC_ZONE_OFFSET;
+}
+
+static inline unsigned int tipc_cluster(__u32 addr)
+{
+	return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET;
+}
+
+static inline unsigned int tipc_node(__u32 addr)
+{
+	return addr & TIPC_NODE_MASK;
+}
+
 #endif
-- 
cgit v1.2.3


From 4f738adba30a7cfc006f605707e7aee847ffefa0 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:10 -0700
Subject: bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data

This implements a BPF ULP layer to allow policy enforcement and
monitoring at the socket layer. In order to support this a new
program type BPF_PROG_TYPE_SK_MSG is used to run the policy at
the sendmsg/sendpage hook. To attach the policy to sockets a
sockmap is used with a new program attach type BPF_SK_MSG_VERDICT.

Similar to previous sockmap usages when a sock is added to a
sockmap, via a map update, if the map contains a BPF_SK_MSG_VERDICT
program type attached then the BPF ULP layer is created on the
socket and the attached BPF_PROG_TYPE_SK_MSG program is run for
every msg in sendmsg case and page/offset in sendpage case.

BPF_PROG_TYPE_SK_MSG Semantics/API:

BPF_PROG_TYPE_SK_MSG supports only two return codes SK_PASS and
SK_DROP. Returning SK_DROP free's the copied data in the sendmsg
case and in the sendpage case leaves the data untouched. Both cases
return -EACESS to the user. Returning SK_PASS will allow the msg to
be sent.

In the sendmsg case data is copied into kernel space buffers before
running the BPF program. The kernel space buffers are stored in a
scatterlist object where each element is a kernel memory buffer.
Some effort is made to coalesce data from the sendmsg call here.
For example a sendmsg call with many one byte iov entries will
likely be pushed into a single entry. The BPF program is run with
data pointers (start/end) pointing to the first sg element.

In the sendpage case data is not copied. We opt not to copy the
data by default here, because the BPF infrastructure does not
know what bytes will be needed nor when they will be needed. So
copying all bytes may be wasteful. Because of this the initial
start/end data pointers are (0,0). Meaning no data can be read or
written. This avoids reading data that may be modified by the
user. A new helper is added later in this series if reading and
writing the data is needed. The helper call will do a copy by
default so that the page is exclusively owned by the BPF call.

The verdict from the BPF_PROG_TYPE_SK_MSG applies to the entire msg
in the sendmsg() case and the entire page/offset in the sendpage case.
This avoids ambiguity on how to handle mixed return codes in the
sendmsg case. Again a helper is added later in the series if
a verdict needs to apply to multiple system calls and/or only
a subpart of the currently being processed message.

The helper msg_redirect_map() can be used to select the socket to
send the data on. This is used similar to existing redirect use
cases. This allows policy to redirect msgs.

Pseudo code simple example:

The basic logic to attach a program to a socket is as follows,

  // load the programs
  bpf_prog_load(SOCKMAP_TCP_MSG_PROG, BPF_PROG_TYPE_SK_MSG,
		&obj, &msg_prog);

  // lookup the sockmap
  bpf_map_msg = bpf_object__find_map_by_name(obj, "my_sock_map");

  // get fd for sockmap
  map_fd_msg = bpf_map__fd(bpf_map_msg);

  // attach program to sockmap
  bpf_prog_attach(msg_prog, map_fd_msg, BPF_SK_MSG_VERDICT, 0);

Adding sockets to the map is done in the normal way,

  // Add a socket 'fd' to sockmap at location 'i'
  bpf_map_update_elem(map_fd_msg, &i, fd, BPF_ANY);

After the above any socket attached to "my_sock_map", in this case
'fd', will run the BPF msg verdict program (msg_prog) on every
sendmsg and sendpage system call.

For a complete example see BPF selftests or sockmap samples.

Implementation notes:

It seemed the simplest, to me at least, to use a refcnt to ensure
psock is not lost across the sendmsg copy into the sg, the bpf program
running on the data in sg_data, and the final pass to the TCP stack.
Some performance testing may show a better method to do this and avoid
the refcnt cost, but for now use the simpler method.

Another item that will come after basic support is in place is
supporting MSG_MORE flag. At the moment we call sendpages even if
the MSG_MORE flag is set. An enhancement would be to collect the
pages into a larger scatterlist and pass down the stack. Notice that
bpf_tcp_sendmsg() could support this with some additional state saved
across sendmsg calls. I built the code to support this without having
to do refactoring work. Other features TBD include ZEROCOPY and the
TCP_RECV_QUEUE/TCP_NO_QUEUE support. This will follow initial series
shortly.

Future work could improve size limits on the scatterlist rings used
here. Currently, we use MAX_SKB_FRAGS simply because this was being
used already in the TLS case. Future work could extend the kernel sk
APIs to tune this depending on workload. This is a trade-off
between memory usage and throughput performance.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1e15d1724d89..ef529539abde 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -133,6 +133,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SOCK_OPS,
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_CGROUP_DEVICE,
+	BPF_PROG_TYPE_SK_MSG,
 };
 
 enum bpf_attach_type {
@@ -143,6 +144,7 @@ enum bpf_attach_type {
 	BPF_SK_SKB_STREAM_PARSER,
 	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_DEVICE,
+	BPF_SK_MSG_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -718,6 +720,15 @@ union bpf_attr {
  * int bpf_override_return(pt_regs, rc)
  *	@pt_regs: pointer to struct pt_regs
  *	@rc: the return value to set
+ *
+ * int bpf_msg_redirect_map(map, key, flags)
+ *     Redirect msg to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_PASS
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -779,7 +790,8 @@ union bpf_attr {
 	FN(perf_prog_read_value),	\
 	FN(getsockopt),			\
 	FN(override_return),		\
-	FN(sock_ops_cb_flags_set),
+	FN(sock_ops_cb_flags_set),	\
+	FN(msg_redirect_map),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -942,6 +954,14 @@ enum sk_action {
 	SK_PASS,
 };
 
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+	void *data;
+	void *data_end;
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
-- 
cgit v1.2.3


From 2a100317c9ebc204a166f16294884fbf9da074ce Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:15 -0700
Subject: bpf: sockmap, add bpf_msg_apply_bytes() helper

A single sendmsg or sendfile system call can contain multiple logical
messages that a BPF program may want to read and apply a verdict. But,
without an apply_bytes helper any verdict on the data applies to all
bytes in the sendmsg/sendfile. Alternatively, a BPF program may only
care to read the first N bytes of a msg. If the payload is large say
MB or even GB setting up and calling the BPF program repeatedly for
all bytes, even though the verdict is already known, creates
unnecessary overhead.

To allow BPF programs to control how many bytes a given verdict
applies to we implement a bpf_msg_apply_bytes() helper. When called
from within a BPF program this sets a counter, internal to the
BPF infrastructure, that applies the last verdict to the next N
bytes. If the N is smaller than the current data being processed
from a sendmsg/sendfile call, the first N bytes will be sent and
the BPF program will be re-run with start_data pointing to the N+1
byte. If N is larger than the current data being processed the
BPF verdict will be applied to multiple sendmsg/sendfile calls
until N bytes are consumed.

Note1 if a socket closes with apply_bytes counter non-zero this
is not a problem because data is not being buffered for N bytes
and is sent as its received.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ef529539abde..a557a2a5d72d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -791,7 +791,8 @@ union bpf_attr {
 	FN(getsockopt),			\
 	FN(override_return),		\
 	FN(sock_ops_cb_flags_set),	\
-	FN(msg_redirect_map),
+	FN(msg_redirect_map),		\
+	FN(msg_apply_bytes),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3


From 91843d540a139eb8070bcff8aa10089164436deb Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:20 -0700
Subject: bpf: sockmap, add msg_cork_bytes() helper

In the case where we need a specific number of bytes before a
verdict can be assigned, even if the data spans multiple sendmsg
or sendfile calls. The BPF program may use msg_cork_bytes().

The extreme case is a user can call sendmsg repeatedly with
1-byte msg segments. Obviously, this is bad for performance but
is still valid. If the BPF program needs N bytes to validate
a header it can use msg_cork_bytes to specify N bytes and the
BPF program will not be called again until N bytes have been
accumulated. The infrastructure will attempt to coalesce data
if possible so in many cases (most my use cases at least) the
data will be in a single scatterlist element with data pointers
pointing to start/end of the element. However, this is dependent
on available memory so is not guaranteed. So BPF programs must
validate data pointer ranges, but this is the case anyways to
convince the verifier the accesses are valid.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a557a2a5d72d..1765cfb16c99 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -792,7 +792,8 @@ union bpf_attr {
 	FN(override_return),		\
 	FN(sock_ops_cb_flags_set),	\
 	FN(msg_redirect_map),		\
-	FN(msg_apply_bytes),
+	FN(msg_apply_bytes),		\
+	FN(msg_cork_bytes),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3


From 015632bb30daaaee64e1bcac07570860e0bf3092 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:25 -0700
Subject: bpf: sk_msg program helper bpf_sk_msg_pull_data

Currently, if a bpf sk msg program is run the program
can only parse data that the (start,end) pointers already
consumed. For sendmsg hooks this is likely the first
scatterlist element. For sendpage this will be the range
(0,0) because the data is shared with userspace and by
default we want to avoid allowing userspace to modify
data while (or after) BPF verdict is being decided.

To support pulling in additional bytes for parsing use
a new helper bpf_sk_msg_pull(start, end, flags) which
works similar to cls tc logic. This helper will attempt
to point the data start pointer at 'start' bytes offest
into msg and data end pointer at 'end' bytes offset into
message.

After basic sanity checks to ensure 'start' <= 'end' and
'end' <= msg_length there are a few cases we need to
handle.

First the sendmsg hook has already copied the data from
userspace and has exclusive access to it. Therefor, it
is not necessesary to copy the data. However, it may
be required. After finding the scatterlist element with
'start' offset byte in it there are two cases. One the
range (start,end) is entirely contained in the sg element
and is already linear. All that is needed is to update the
data pointers, no allocate/copy is needed. The other case
is (start, end) crosses sg element boundaries. In this
case we allocate a block of size 'end - start' and copy
the data to linearize it.

Next sendpage hook has not copied any data in initial
state so that data pointers are (0,0). In this case we
handle it similar to the above sendmsg case except the
allocation/copy must always happen. Then when sending
the data we have possibly three memory regions that
need to be sent, (0, start - 1), (start, end), and
(end + 1, msg_length). This is required to ensure any
writes by the BPF program are correctly transmitted.

Lastly this operation will invalidate any previous
data checks so BPF programs will have to revalidate
pointers after making this BPF call.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1765cfb16c99..18b7c510c511 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -793,7 +793,8 @@ union bpf_attr {
 	FN(sock_ops_cb_flags_set),	\
 	FN(msg_redirect_map),		\
 	FN(msg_apply_bytes),		\
-	FN(msg_cork_bytes),
+	FN(msg_cork_bytes),		\
+	FN(msg_pull_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3


From d719e3f21cf91d3f82bd827d46199ba41af2f73a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 9 Mar 2018 11:57:20 +0100
Subject: netfilter: nft_ct: add NFT_CT_{SRC,DST}_{IP,IP6}

All existing keys, except the NFT_CT_SRC and NFT_CT_DST are assumed to
have strict datatypes. This is causing problems with sets and
concatenations given the specific length of these keys is not known.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Florian Westphal <fw@strlen.de>
---
 include/uapi/linux/netfilter/nf_tables.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 66dceee0ae30..6a3d653d5b27 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -909,8 +909,8 @@ enum nft_rt_attributes {
  * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms
  * @NFT_CT_HELPER: connection tracking helper assigned to conntrack
  * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol
- * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address)
- * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address)
+ * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address, deprecated)
+ * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address, deprecated)
  * @NFT_CT_PROTOCOL: conntrack layer 4 protocol
  * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source
  * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination
@@ -920,6 +920,10 @@ enum nft_rt_attributes {
  * @NFT_CT_AVGPKT: conntrack average bytes per packet
  * @NFT_CT_ZONE: conntrack zone
  * @NFT_CT_EVENTMASK: ctnetlink events to be generated for this conntrack
+ * @NFT_CT_SRC_IP: conntrack layer 3 protocol source (IPv4 address)
+ * @NFT_CT_DST_IP: conntrack layer 3 protocol destination (IPv4 address)
+ * @NFT_CT_SRC_IP6: conntrack layer 3 protocol source (IPv6 address)
+ * @NFT_CT_DST_IP6: conntrack layer 3 protocol destination (IPv6 address)
  */
 enum nft_ct_keys {
 	NFT_CT_STATE,
@@ -941,6 +945,10 @@ enum nft_ct_keys {
 	NFT_CT_AVGPKT,
 	NFT_CT_ZONE,
 	NFT_CT_EVENTMASK,
+	NFT_CT_SRC_IP,
+	NFT_CT_DST_IP,
+	NFT_CT_SRC_IP6,
+	NFT_CT_DST_IP6,
 };
 
 /**
-- 
cgit v1.2.3


From 472a73e00757b971d613d796374d2727b2e4954d Mon Sep 17 00:00:00 2001
From: Jack Ma <jack.ma@alliedtelesis.co.nz>
Date: Mon, 19 Mar 2018 09:41:59 +1300
Subject: netfilter: xt_conntrack: Support bit-shifting for CONNMARK & MARK
 targets.

This patch introduces a new feature that allows bitshifting (left
and right) operations to co-operate with existing iptables options.

Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Jack Ma <jack.ma@alliedtelesis.co.nz>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/xt_connmark.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/xt_connmark.h b/include/uapi/linux/netfilter/xt_connmark.h
index 408a9654f05c..1aa5c955ee1e 100644
--- a/include/uapi/linux/netfilter/xt_connmark.h
+++ b/include/uapi/linux/netfilter/xt_connmark.h
@@ -19,11 +19,21 @@ enum {
 	XT_CONNMARK_RESTORE
 };
 
+enum {
+	D_SHIFT_LEFT = 0,
+	D_SHIFT_RIGHT,
+};
+
 struct xt_connmark_tginfo1 {
 	__u32 ctmark, ctmask, nfmask;
 	__u8 mode;
 };
 
+struct xt_connmark_tginfo2 {
+	__u32 ctmark, ctmask, nfmask;
+	__u8 shift_dir, shift_bits, mode;
+};
+
 struct xt_connmark_mtinfo1 {
 	__u32 mark, mask;
 	__u8 invert;
-- 
cgit v1.2.3


From 20710b3b81895c89e92bcc32ce85c0bede1171f8 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 20 Mar 2018 12:33:51 +0100
Subject: netfilter: ctnetlink: synproxy support

This patch exposes synproxy information per-conntrack. Moreover, send
sequence adjustment events once server sends us the SYN,ACK packet, so
we can synchronize the sequence adjustment too for packets going as
reply from the server, as part of the synproxy logic.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_conntrack_common.h |  1 +
 include/uapi/linux/netfilter/nfnetlink_conntrack.h | 10 ++++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 9574bd40870b..c712eb6879f1 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -129,6 +129,7 @@ enum ip_conntrack_events {
 	IPCT_NATSEQADJ = IPCT_SEQADJ,
 	IPCT_SECMARK,		/* new security mark has been set */
 	IPCT_LABEL,		/* new connlabel has been set */
+	IPCT_SYNPROXY,		/* synproxy has been set */
 #ifdef __KERNEL__
 	__IPCT_MAX
 #endif
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 7397e022ce6e..77987111cab0 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -54,6 +54,7 @@ enum ctattr_type {
 	CTA_MARK_MASK,
 	CTA_LABELS,
 	CTA_LABELS_MASK,
+	CTA_SYNPROXY,
 	__CTA_MAX
 };
 #define CTA_MAX (__CTA_MAX - 1)
@@ -190,6 +191,15 @@ enum ctattr_natseq {
 };
 #define CTA_NAT_SEQ_MAX (__CTA_NAT_SEQ_MAX - 1)
 
+enum ctattr_synproxy {
+	CTA_SYNPROXY_UNSPEC,
+	CTA_SYNPROXY_ISN,
+	CTA_SYNPROXY_ITS,
+	CTA_SYNPROXY_TSOFF,
+	__CTA_SYNPROXY_MAX,
+};
+#define CTA_SYNPROXY_MAX (__CTA_SYNPROXY_MAX - 1)
+
 enum ctattr_expect {
 	CTA_EXPECT_UNSPEC,
 	CTA_EXPECT_MASTER,
-- 
cgit v1.2.3


From 5adc1668ddc42bb44fd6d006cacad74ed0cbf49d Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Sun, 4 Mar 2018 09:28:53 +0100
Subject: netfilter: ebtables: add support for matching ICMP type and code

We already have ICMPv6 type/code matches. This adds support for IPv4 ICMP
matches in the same way.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter_bridge/ebt_ip.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter_bridge/ebt_ip.h b/include/uapi/linux/netfilter_bridge/ebt_ip.h
index 8e462fb1983f..4ed7fbb0a482 100644
--- a/include/uapi/linux/netfilter_bridge/ebt_ip.h
+++ b/include/uapi/linux/netfilter_bridge/ebt_ip.h
@@ -24,8 +24,9 @@
 #define EBT_IP_PROTO 0x08
 #define EBT_IP_SPORT 0x10
 #define EBT_IP_DPORT 0x20
+#define EBT_IP_ICMP 0x40
 #define EBT_IP_MASK (EBT_IP_SOURCE | EBT_IP_DEST | EBT_IP_TOS | EBT_IP_PROTO |\
- EBT_IP_SPORT | EBT_IP_DPORT )
+		     EBT_IP_SPORT | EBT_IP_DPORT | EBT_IP_ICMP)
 #define EBT_IP_MATCH "ip"
 
 /* the same values are used for the invflags */
@@ -38,8 +39,14 @@ struct ebt_ip_info {
 	__u8  protocol;
 	__u8  bitmask;
 	__u8  invflags;
-	__u16 sport[2];
-	__u16 dport[2];
+	union {
+		__u16 sport[2];
+		__u8 icmp_type[2];
+	};
+	union {
+		__u16 dport[2];
+		__u8 icmp_code[2];
+	};
 };
 
 #endif
-- 
cgit v1.2.3


From 78d9f4d49bbecd101b4e5faf19f8f70719fee2ca Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Sun, 4 Mar 2018 09:28:54 +0100
Subject: netfilter: ebtables: add support for matching IGMP type

We already have ICMPv6 type/code matches (which can be used to distinguish
different types of MLD packets). Add support for IPv4 IGMP matches in the
same way.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter_bridge/ebt_ip.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter_bridge/ebt_ip.h b/include/uapi/linux/netfilter_bridge/ebt_ip.h
index 4ed7fbb0a482..46d6261370b0 100644
--- a/include/uapi/linux/netfilter_bridge/ebt_ip.h
+++ b/include/uapi/linux/netfilter_bridge/ebt_ip.h
@@ -25,8 +25,9 @@
 #define EBT_IP_SPORT 0x10
 #define EBT_IP_DPORT 0x20
 #define EBT_IP_ICMP 0x40
+#define EBT_IP_IGMP 0x80
 #define EBT_IP_MASK (EBT_IP_SOURCE | EBT_IP_DEST | EBT_IP_TOS | EBT_IP_PROTO |\
-		     EBT_IP_SPORT | EBT_IP_DPORT | EBT_IP_ICMP)
+		     EBT_IP_SPORT | EBT_IP_DPORT | EBT_IP_ICMP | EBT_IP_IGMP)
 #define EBT_IP_MATCH "ip"
 
 /* the same values are used for the invflags */
@@ -42,6 +43,7 @@ struct ebt_ip_info {
 	union {
 		__u16 sport[2];
 		__u8 icmp_type[2];
+		__u8 igmp_type[2];
 	};
 	union {
 		__u16 dport[2];
-- 
cgit v1.2.3


From 2cb021f5de55b1d158fa18b0215a4613c3289a82 Mon Sep 17 00:00:00 2001
From: Dmitry Lebed <dlebed@quantenna.com>
Date: Thu, 1 Mar 2018 12:39:16 +0300
Subject: cfg80211/nl80211: add CAC_STARTED event

CAC_STARTED event is needed for DFS offload feature and
should be generated by driver/HW if DFS_OFFLOAD is enabled.

Signed-off-by: Dmitry Lebed <dlebed@quantenna.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c13c84304be3..b8e989073cbb 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -5204,6 +5204,8 @@ enum nl80211_smps_mode {
  *	non-operating channel is expired and no longer valid. New CAC must
  *	be done on this channel before starting the operation. This is not
  *	applicable for ETSI dfs domain where pre-CAC is valid for ever.
+ * @NL80211_RADAR_CAC_STARTED: Channel Availability Check has been started,
+ *	should be generated by HW if NL80211_EXT_FEATURE_DFS_OFFLOAD is enabled.
  */
 enum nl80211_radar_event {
 	NL80211_RADAR_DETECTED,
@@ -5211,6 +5213,7 @@ enum nl80211_radar_event {
 	NL80211_RADAR_CAC_ABORTED,
 	NL80211_RADAR_NOP_FINISHED,
 	NL80211_RADAR_PRE_CAC_EXPIRED,
+	NL80211_RADAR_CAC_STARTED,
 };
 
 /**
-- 
cgit v1.2.3


From 13cf6dec93e021ebd297619ea1926aea31b6430b Mon Sep 17 00:00:00 2001
From: Dmitry Lebed <dlebed@quantenna.com>
Date: Thu, 1 Mar 2018 12:39:15 +0300
Subject: cfg80211/nl80211: add DFS offload flag

Add wiphy EXT_FEATURE flag to indicate that HW or driver does
all DFS actions by itself.
User-space functionality already implemented in hostapd using
vendor-specific (QCA) OUI to advertise DFS offload support.
Need to introduce generic flag to inform about DFS offload support.
For devices with DFS_OFFLOAD flag set user-space will no longer
need to issue CAC or do any actions in response to
"radar detected" events. HW will do everything by itself and send
events to user-space to indicate that CAC was started/finished, etc.

Signed-off-by: Dmitrii Lebed <dlebed@quantenna.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b8e989073cbb..60fefc5a2ea4 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4999,6 +4999,12 @@ enum nl80211_feature_flags {
  * @NL80211_EXT_FEATURE_LOW_SPAN_SCAN: Driver supports low span scan.
  * @NL80211_EXT_FEATURE_LOW_POWER_SCAN: Driver supports low power scan.
  * @NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN: Driver supports high accuracy scan.
+ * @NL80211_EXT_FEATURE_DFS_OFFLOAD: HW/driver will offload DFS actions.
+ *	Device or driver will do all DFS-related actions by itself,
+ *	informing user-space about CAC progress, radar detection event,
+ *	channel change triggered by radar detection event.
+ *	No need to start CAC from user-space, no need to react to
+ *	"radar detected" event.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -5029,6 +5035,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_LOW_SPAN_SCAN,
 	NL80211_EXT_FEATURE_LOW_POWER_SCAN,
 	NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN,
+	NL80211_EXT_FEATURE_DFS_OFFLOAD,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
-- 
cgit v1.2.3


From c30b70deb5f4861f590031c33fd3ec6cc63f1df1 Mon Sep 17 00:00:00 2001
From: GhantaKrishnamurthy MohanKrishna
 <mohan.krishna.ghanta.krishnamurthy@ericsson.com>
Date: Wed, 21 Mar 2018 14:37:44 +0100
Subject: tipc: implement socket diagnostics for AF_TIPC

This commit adds socket diagnostics capability for AF_TIPC in netlink
family NETLINK_SOCK_DIAG in a new kernel module (diag.ko).

The following are key design considerations:
- config TIPC_DIAG has default y, like INET_DIAG.
- only requests with flag NLM_F_DUMP is supported (dump all).
- tipc_sock_diag_req message is introduced to send filter parameters.
- the response attributes are of TLV, some nested.

To avoid exposing data structures between diag and tipc modules and
avoid code duplication, the following additions are required:
- export tipc_nl_sk_walk function to reuse socket iterator.
- export tipc_sk_fill_sock_diag to fill the tipc diag attributes.
- create a sock_diag response message in __tipc_add_sock_diag defined
  in diag.c and use the above exported tipc_sk_fill_sock_diag
  to fill response.

Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: GhantaKrishnamurthy MohanKrishna <mohan.krishna.ghanta.krishnamurthy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h      | 18 ++++++++++++++++++
 include/uapi/linux/tipc_sockets_diag.h | 17 +++++++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 include/uapi/linux/tipc_sockets_diag.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index 469aa67a5ecb..d7cec0480d70 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -114,6 +114,13 @@ enum {
 	TIPC_NLA_SOCK_REF,		/* u32 */
 	TIPC_NLA_SOCK_CON,		/* nest */
 	TIPC_NLA_SOCK_HAS_PUBL,		/* flag */
+	TIPC_NLA_SOCK_STAT,		/* nest */
+	TIPC_NLA_SOCK_TYPE,		/* u32 */
+	TIPC_NLA_SOCK_INO,		/* u32 */
+	TIPC_NLA_SOCK_UID,		/* u32 */
+	TIPC_NLA_SOCK_TIPC_STATE,	/* u32 */
+	TIPC_NLA_SOCK_COOKIE,		/* u64 */
+	TIPC_NLA_SOCK_PAD,		/* flag */
 
 	__TIPC_NLA_SOCK_MAX,
 	TIPC_NLA_SOCK_MAX = __TIPC_NLA_SOCK_MAX - 1
@@ -238,6 +245,17 @@ enum {
 	TIPC_NLA_CON_MAX = __TIPC_NLA_CON_MAX - 1
 };
 
+/* Nest, socket statistics info */
+enum {
+	TIPC_NLA_SOCK_STAT_RCVQ,	/* u32 */
+	TIPC_NLA_SOCK_STAT_SENDQ,	/* u32 */
+	TIPC_NLA_SOCK_STAT_LINK_CONG,	/* flag */
+	TIPC_NLA_SOCK_STAT_CONN_CONG,	/* flag */
+
+	__TIPC_NLA_SOCK_STAT_MAX,
+	TIPC_NLA_SOCK_STAT_MAX = __TIPC_NLA_SOCK_STAT_MAX - 1
+};
+
 /* Nest, link propreties. Valid for link, media and bearer */
 enum {
 	TIPC_NLA_PROP_UNSPEC,
diff --git a/include/uapi/linux/tipc_sockets_diag.h b/include/uapi/linux/tipc_sockets_diag.h
new file mode 100644
index 000000000000..7678cf2f0dcc
--- /dev/null
+++ b/include/uapi/linux/tipc_sockets_diag.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* AF_TIPC sock_diag interface for querying open sockets */
+
+#ifndef _UAPI__TIPC_SOCKETS_DIAG_H__
+#define _UAPI__TIPC_SOCKETS_DIAG_H__
+
+#include <linux/types.h>
+#include <linux/sock_diag.h>
+
+/* Request */
+struct tipc_sock_diag_req {
+	__u8	sdiag_family;	/* must be AF_TIPC */
+	__u8	sdiag_protocol;	/* must be 0 */
+	__u16	pad;		/* must be 0 */
+	__u32	tidiag_states;	/* query*/
+};
+#endif /* _UAPI__TIPC_SOCKETS_DIAG_H__ */
-- 
cgit v1.2.3


From 872619d8cf810c17279335ef531a2a34f3b4e589 Mon Sep 17 00:00:00 2001
From: GhantaKrishnamurthy MohanKrishna
 <mohan.krishna.ghanta.krishnamurthy@ericsson.com>
Date: Wed, 21 Mar 2018 14:37:45 +0100
Subject: tipc: step sk->sk_drops when rcv buffer is full

Currently when tipc is unable to queue a received message on a
socket, the message is rejected back to the sender with error
TIPC_ERR_OVERLOAD. However, the application on this socket
has no knowledge about these discards.

In this commit, we try to step the sk_drops counter when tipc
is unable to queue a received message. Export sk_drops
using tipc socket diagnostics.

Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: GhantaKrishnamurthy MohanKrishna <mohan.krishna.ghanta.krishnamurthy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index d7cec0480d70..d896ded51bcb 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -251,6 +251,7 @@ enum {
 	TIPC_NLA_SOCK_STAT_SENDQ,	/* u32 */
 	TIPC_NLA_SOCK_STAT_LINK_CONG,	/* flag */
 	TIPC_NLA_SOCK_STAT_CONN_CONG,	/* flag */
+	TIPC_NLA_SOCK_STAT_DROP,	/* u32 */
 
 	__TIPC_NLA_SOCK_STAT_MAX,
 	TIPC_NLA_SOCK_STAT_MAX = __TIPC_NLA_SOCK_STAT_MAX - 1
-- 
cgit v1.2.3


From 14452ca3b5ce304fb2fea96dbc9ca1e4e7978551 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Wed, 21 Mar 2018 19:48:14 -0600
Subject: net: qualcomm: rmnet: Export mux_id and flags to netlink

Define new netlink attributes for rmnet mux_id and flags. These
flags / mux_id were earlier using vlan flags / id respectively.
The flag bits are also moved to uapi and are renamed with
prefix RMNET_FLAG_*.

Also add the rmnet policy to handle the new netlink attributes.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 11d0c0ea2bfa..68699f654118 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -959,4 +959,25 @@ enum {
 
 #define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
 
+/* rmnet section */
+
+#define RMNET_FLAGS_INGRESS_DEAGGREGATION         (1U << 0)
+#define RMNET_FLAGS_INGRESS_MAP_COMMANDS          (1U << 1)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4           (1U << 2)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4            (1U << 3)
+
+enum {
+	IFLA_RMNET_UNSPEC,
+	IFLA_RMNET_MUX_ID,
+	IFLA_RMNET_FLAGS,
+	__IFLA_RMNET_MAX,
+};
+
+#define IFLA_RMNET_MAX	(__IFLA_RMNET_MAX - 1)
+
+struct ifla_rmnet_flags {
+	__u32	flags;
+	__u32	mask;
+};
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
-- 
cgit v1.2.3


From c46234ebb4d1eee5e09819f49169e51cfc6eb909 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Thu, 22 Mar 2018 10:10:35 -0700
Subject: tls: RX path for ktls

Add rx path for tls software implementation.

recvmsg, splice_read, and poll implemented.

An additional sockopt TLS_RX is added, with the same interface as
TLS_TX.  Either TLX_RX or TLX_TX may be provided separately, or
together (with two different setsockopt calls with appropriate keys).

Control messages are passed via CMSG in a similar way to transmit.
If no cmsg buffer is passed, then only application data records
will be passed to userspace, and EIO is returned for other types of
alerts.

EBADMSG is passed for decryption errors, and EMSGSIZE is passed for
framing too big, and EBADMSG for framing too small (matching openssl
semantics). EINVAL is returned for TLS versions that do not match the
original setsockopt call.  All are unrecoverable.

strparser is used to parse TLS framing.   Decryption is done directly
in to userspace buffers if they are large enough to support it, otherwise
sk_cow_data is called (similar to ipsec), and buffers are decrypted in
place and copied.  splice_read always decrypts in place, since no
buffers are provided to decrypt in to.

sk_poll is overridden, and only returns POLLIN if a full TLS message is
received.  Otherwise we wait for strparser to finish reading a full frame.
Actual decryption is only done during recvmsg or splice_read calls.

Signed-off-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index 293b2cdad88d..c6633e97eca4 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -38,6 +38,7 @@
 
 /* TLS socket options */
 #define TLS_TX			1	/* Set transmit parameters */
+#define TLS_RX			2	/* Set receive parameters */
 
 /* Supported versions */
 #define TLS_VERSION_MINOR(ver)	((ver) & 0xFF)
@@ -59,6 +60,7 @@
 #define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE		8
 
 #define TLS_SET_RECORD_TYPE	1
+#define TLS_GET_RECORD_TYPE	2
 
 struct tls_crypto_info {
 	__u16 version;
-- 
cgit v1.2.3


From d50ccc2d3909fc1b4d40e4af16b026f05dc68707 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 22 Mar 2018 20:42:50 +0100
Subject: tipc: add 128-bit node identifier

We add a 128-bit node identity, as an alternative to the currently used
32-bit node address.

For the sake of compatibility and to minimize message header changes
we retain the existing 32-bit address field. When not set explicitly by
the user, this field will be filled with a hash value generated from the
much longer node identity, and be used as a shorthand value for the
latter.

We permit either the address or the identity to be set by configuration,
but not both, so when the address value is set by a legacy user the
corresponding 128-bit node identity is generated based on the that value.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index d896ded51bcb..0affb682e5e3 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -169,6 +169,8 @@ enum {
 	TIPC_NLA_NET_UNSPEC,
 	TIPC_NLA_NET_ID,		/* u32 */
 	TIPC_NLA_NET_ADDR,		/* u32 */
+	TIPC_NLA_NET_NODEID,		/* u64 */
+	TIPC_NLA_NET_NODEID_W1,		/* u64 */
 
 	__TIPC_NLA_NET_MAX,
 	TIPC_NLA_NET_MAX = __TIPC_NLA_NET_MAX - 1
-- 
cgit v1.2.3


From e1577c1c881b09e9f15a743a4a1907815b74d0f7 Mon Sep 17 00:00:00 2001
From: Inbar Karmy <inbark@mellanox.com>
Date: Mon, 20 Nov 2017 16:14:30 +0200
Subject: ethtool: Add support for configuring PFC stall prevention in ethtool

In the event where the device unexpectedly becomes unresponsive
for a long period of time, flow control mechanism may propagate
pause frames which will cause congestion spreading to the entire
network.
To prevent this scenario, when the device is stalled for a period
longer than a pre-configured timeout, flow control mechanisms are
automatically disabled.

This patch adds support for the ETHTOOL_PFC_STALL_PREVENTION
as a tunable.
This API provides support for configuring flow control storm prevention
timeout (msec).

Signed-off-by: Inbar Karmy <inbark@mellanox.com>
Cc: Michal Kubecek <mkubecek@suse.cz>
Cc: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/uapi/linux/ethtool.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 20da156aaf64..4ca65b56084f 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -217,10 +217,14 @@ struct ethtool_value {
 	__u32	data;
 };
 
+#define PFC_STORM_PREVENTION_AUTO	0xffff
+#define PFC_STORM_PREVENTION_DISABLE	0
+
 enum tunable_id {
 	ETHTOOL_ID_UNSPEC,
 	ETHTOOL_RX_COPYBREAK,
 	ETHTOOL_TX_COPYBREAK,
+	ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */
 	/*
 	 * Add your fresh new tubale attribute above and remember to update
 	 * tunable_strings[] in net/core/ethtool.c
-- 
cgit v1.2.3


From c4f6699dfcb8558d138fe838f741b2c10f416cf9 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 28 Mar 2018 12:05:37 -0700
Subject: bpf: introduce BPF_RAW_TRACEPOINT

Introduce BPF_PROG_TYPE_RAW_TRACEPOINT bpf program type to access
kernel internal arguments of the tracepoints in their raw form.

>From bpf program point of view the access to the arguments look like:
struct bpf_raw_tracepoint_args {
       __u64 args[0];
};

int bpf_prog(struct bpf_raw_tracepoint_args *ctx)
{
  // program can read args[N] where N depends on tracepoint
  // and statically verified at program load+attach time
}

kprobe+bpf infrastructure allows programs access function arguments.
This feature allows programs access raw tracepoint arguments.

Similar to proposed 'dynamic ftrace events' there are no abi guarantees
to what the tracepoints arguments are and what their meaning is.
The program needs to type cast args properly and use bpf_probe_read()
helper to access struct fields when argument is a pointer.

For every tracepoint __bpf_trace_##call function is prepared.
In assembler it looks like:
(gdb) disassemble __bpf_trace_xdp_exception
Dump of assembler code for function __bpf_trace_xdp_exception:
   0xffffffff81132080 <+0>:     mov    %ecx,%ecx
   0xffffffff81132082 <+2>:     jmpq   0xffffffff811231f0 <bpf_trace_run3>

where

TRACE_EVENT(xdp_exception,
        TP_PROTO(const struct net_device *dev,
                 const struct bpf_prog *xdp, u32 act),

The above assembler snippet is casting 32-bit 'act' field into 'u64'
to pass into bpf_trace_run3(), while 'dev' and 'xdp' args are passed as-is.
All of ~500 of __bpf_trace_*() functions are only 5-10 byte long
and in total this approach adds 7k bytes to .text.

This approach gives the lowest possible overhead
while calling trace_xdp_exception() from kernel C code and
transitioning into bpf land.
Since tracepoint+bpf are used at speeds of 1M+ events per second
this is valuable optimization.

The new BPF_RAW_TRACEPOINT_OPEN sys_bpf command is introduced
that returns anon_inode FD of 'bpf-raw-tracepoint' object.

The user space looks like:
// load bpf prog with BPF_PROG_TYPE_RAW_TRACEPOINT type
prog_fd = bpf_prog_load(...);
// receive anon_inode fd for given bpf_raw_tracepoint with prog attached
raw_tp_fd = bpf_raw_tracepoint_open("xdp_exception", prog_fd);

Ctrl-C of tracing daemon or cmdline tool that uses this feature
will automatically detach bpf program, unload it and
unregister tracepoint probe.

On the kernel side the __bpf_raw_tp_map section of pointers to
tracepoint definition and to __bpf_trace_*() probe function is used
to find a tracepoint with "xdp_exception" name and
corresponding __bpf_trace_xdp_exception() probe function
which are passed to tracepoint_probe_register() to connect probe
with tracepoint.

Addition of bpf_raw_tracepoint doesn't interfere with ftrace and perf
tracepoint mechanisms. perf_event_open() can be used in parallel
on the same tracepoint.
Multiple bpf_raw_tracepoint_open("xdp_exception", prog_fd) are permitted.
Each with its own bpf program. The kernel will execute
all tracepoint probes and all attached bpf programs.

In the future bpf_raw_tracepoints can be extended with
query/introspection logic.

__bpf_raw_tp_map section logic was contributed by Steven Rostedt

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 18b7c510c511..1878201c2d77 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -94,6 +94,7 @@ enum bpf_cmd {
 	BPF_MAP_GET_FD_BY_ID,
 	BPF_OBJ_GET_INFO_BY_FD,
 	BPF_PROG_QUERY,
+	BPF_RAW_TRACEPOINT_OPEN,
 };
 
 enum bpf_map_type {
@@ -134,6 +135,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_CGROUP_DEVICE,
 	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_RAW_TRACEPOINT,
 };
 
 enum bpf_attach_type {
@@ -344,6 +346,11 @@ union bpf_attr {
 		__aligned_u64	prog_ids;
 		__u32		prog_cnt;
 	} query;
+
+	struct {
+		__u64 name;
+		__u32 prog_fd;
+	} raw_tracepoint;
 } __attribute__((aligned(8)));
 
 /* BPF helper function descriptions:
@@ -1152,4 +1159,8 @@ struct bpf_cgroup_dev_ctx {
 	__u32 minor;
 };
 
+struct bpf_raw_tracepoint_args {
+	__u64 args[0];
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.2.3


From f8d16d3edb4dbae080df04318423c360de3c594d Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:45 -0500
Subject: nl80211: Add SOCKET_OWNER support to JOIN_IBSS

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
[johannes: fix race with wdev lock/unlock by just acquiring once]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 60fefc5a2ea4..266b43c4f6e1 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1962,6 +1962,8 @@ enum nl80211_commands {
  *	multicast group.
  *	If set during %NL80211_CMD_ASSOCIATE or %NL80211_CMD_CONNECT the
  *	station will deauthenticate when the socket is closed.
+ *	If set during %NL80211_CMD_JOIN_IBSS the IBSS will be automatically
+ *	torn down when the socket is closed.
  *
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
-- 
cgit v1.2.3


From 188c1b3c04d69e842122daf201f07a34fcfad039 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:46 -0500
Subject: nl80211: Add SOCKET_OWNER support to JOIN_MESH

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
[johannes: fix race with wdev lock/unlock by just acquiring once]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 266b43c4f6e1..98eb37def37d 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1964,6 +1964,8 @@ enum nl80211_commands {
  *	station will deauthenticate when the socket is closed.
  *	If set during %NL80211_CMD_JOIN_IBSS the IBSS will be automatically
  *	torn down when the socket is closed.
+ *	If set during %NL80211_CMD_JOIN_MESH the mesh setup will be
+ *	automatically torn down when the socket is closed.
  *
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
-- 
cgit v1.2.3


From 466a306142c002b40deaa58da94741af4153d1c4 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:47 -0500
Subject: nl80211: Add SOCKET_OWNER support to START_AP

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 98eb37def37d..9ea3d6039eca 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1966,6 +1966,8 @@ enum nl80211_commands {
  *	torn down when the socket is closed.
  *	If set during %NL80211_CMD_JOIN_MESH the mesh setup will be
  *	automatically torn down when the socket is closed.
+ *	If set during %NL80211_CMD_START_AP the AP will be automatically
+ *	disabled when the socket is closed.
  *
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
-- 
cgit v1.2.3


From 6a671a50f8199b3e1fe49fa8afff0fc8335da79c Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:41 -0500
Subject: nl80211: Add CMD_CONTROL_PORT_FRAME API

This commit also adds cfg80211_rx_control_port function.  This is used
to generate a CMD_CONTROL_PORT_FRAME event out to userspace.  The
conn_owner_nlportid is used as the unicast destination.  This means that
userspace must specify NL80211_ATTR_SOCKET_OWNER flag if control port
over nl80211 routing is requested in NL80211_CMD_CONNECT,
NL80211_CMD_ASSOCIATE, NL80211_CMD_START_AP or IBSS/mesh join.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
[johannes: fix return value of cfg80211_rx_control_port()]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 9ea3d6039eca..6a3cc7a635b5 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -990,6 +990,17 @@
  *	&NL80211_CMD_CONNECT or &NL80211_CMD_ROAM. If the 4 way handshake failed
  *	&NL80211_CMD_DISCONNECT should be indicated instead.
  *
+ * @NL80211_CMD_CONTROL_PORT_FRAME: Control Port (e.g. PAE) frame TX request
+ *	and RX notification.  This command is used both as a request to transmit
+ *	a control port frame and as a notification that a control port frame
+ *	has been received. %NL80211_ATTR_FRAME is used to specify the
+ *	frame contents.  The frame is the raw EAPoL data, without ethernet or
+ *	802.11 headers.
+ *	When used as an event indication %NL80211_ATTR_CONTROL_PORT_ETHERTYPE,
+ *	%NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT and %NL80211_ATTR_MAC are added
+ *	indicating the protocol type of the received frame; whether the frame
+ *	was received unencrypted and the MAC address of the peer respectively.
+ *
  * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded.
  *
  * @NL80211_CMD_EXTERNAL_AUTH: This interface is exclusively defined for host
@@ -1228,6 +1239,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_STA_OPMODE_CHANGED,
 
+	NL80211_CMD_CONTROL_PORT_FRAME,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
-- 
cgit v1.2.3


From 2576a9ace47eba28a682d249d1d6402f891808c9 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:42 -0500
Subject: nl80211: Implement TX of control port frames

This commit implements the TX side of NL80211_CMD_CONTROL_PORT_FRAME.
Userspace provides the raw EAPoL frame using NL80211_ATTR_FRAME.
Userspace should also provide the destination address and the protocol
type to use when sending the frame.  This is used to implement TX of
Pre-authentication frames.  If CONTROL_PORT_ETHERTYPE_NO_ENCRYPT is
specified, then the driver will be asked not to encrypt the outgoing
frame.

A new EXT_FEATURE flag is introduced so that nl80211 code can check
whether a given wiphy has capability to pass EAPoL frames over nl80211.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 6a3cc7a635b5..3167d6f7fc68 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -5024,6 +5024,8 @@ enum nl80211_feature_flags {
  *	channel change triggered by radar detection event.
  *	No need to start CAC from user-space, no need to react to
  *	"radar detected" event.
+ * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211: Driver supports sending and
+ *	receiving control port frames over nl80211 instead of the netdevice.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -5055,6 +5057,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_LOW_POWER_SCAN,
 	NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN,
 	NL80211_EXT_FEATURE_DFS_OFFLOAD,
+	NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
-- 
cgit v1.2.3


From 64bf3d4bc2b0725b3c5ffadd982a9746bfc738b7 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:43 -0500
Subject: nl80211: Add CONTROL_PORT_OVER_NL80211 attribute

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 3167d6f7fc68..15daf5e2638d 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -542,7 +542,8 @@
  *	IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_USE_MFP,
  *	%NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT,
  *	%NL80211_ATTR_CONTROL_PORT_ETHERTYPE,
- *	%NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, %NL80211_ATTR_MAC_HINT, and
+ *	%NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT,
+ *	%NL80211_ATTR_CONTROL_PORT_OVER_NL80211, %NL80211_ATTR_MAC_HINT, and
  *	%NL80211_ATTR_WIPHY_FREQ_HINT.
  *	If included, %NL80211_ATTR_MAC and %NL80211_ATTR_WIPHY_FREQ are
  *	restrictions on BSS selection, i.e., they effectively prevent roaming
@@ -1488,6 +1489,15 @@ enum nl80211_commands {
  * @NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT: When included along with
  *	%NL80211_ATTR_CONTROL_PORT_ETHERTYPE, indicates that the custom
  *	ethertype frames used for key negotiation must not be encrypted.
+ * @NL80211_ATTR_CONTROL_PORT_OVER_NL80211: A flag indicating whether control
+ *	port frames (e.g. of type given in %NL80211_ATTR_CONTROL_PORT_ETHERTYPE)
+ *	will be sent directly to the network interface or sent via the NL80211
+ *	socket.  If this attribute is missing, then legacy behavior of sending
+ *	control port frames directly to the network interface is used.  If the
+ *	flag is included, then control port frames are sent over NL80211 instead
+ *	using %CMD_CONTROL_PORT_FRAME.  If control port routing over NL80211 is
+ *	to be used then userspace must also use the %NL80211_ATTR_SOCKET_OWNER
+ *	flag.
  *
  * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver.
  *	We recommend using nested, driver-specific attributes within this.
@@ -2647,6 +2657,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_NSS,
 	NL80211_ATTR_ACK_SIGNAL,
 
+	NL80211_ATTR_CONTROL_PORT_OVER_NL80211,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
-- 
cgit v1.2.3


From 39c202d228c3da5a5531be847e9b06cc9b787f31 Mon Sep 17 00:00:00 2001
From: Bernie Harris <bernie.harris@alliedtelesis.co.nz>
Date: Wed, 21 Mar 2018 15:42:15 +1300
Subject: netfilter: ebtables: Add support for specifying match revision

Currently ebtables assumes that the revision number of all match
modules is 0, which is an issue when trying to use existing
xtables matches with ebtables. The solution is to modify ebtables
to allow extensions to specify a revision number, similar to
iptables. This gets passed down to the kernel, which is then able
to find the match module correctly.

To main binary backwards compatibility, the size of the ebt_entry
structures is not changed, only the size of the name field is
decreased by 1 byte to make room for the revision field.

Signed-off-by: Bernie Harris <bernie.harris@alliedtelesis.co.nz>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter_bridge/ebtables.h | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h
index 9ff57c0a0199..0c7dc8315013 100644
--- a/include/uapi/linux/netfilter_bridge/ebtables.h
+++ b/include/uapi/linux/netfilter_bridge/ebtables.h
@@ -20,6 +20,7 @@
 #define EBT_TABLE_MAXNAMELEN 32
 #define EBT_CHAIN_MAXNAMELEN EBT_TABLE_MAXNAMELEN
 #define EBT_FUNCTION_MAXNAMELEN EBT_TABLE_MAXNAMELEN
+#define EBT_EXTENSION_MAXNAMELEN 31
 
 /* verdicts >0 are "branches" */
 #define EBT_ACCEPT   -1
@@ -120,7 +121,10 @@ struct ebt_entries {
 
 struct ebt_entry_match {
 	union {
-		char name[EBT_FUNCTION_MAXNAMELEN];
+		struct {
+			char name[EBT_EXTENSION_MAXNAMELEN];
+			uint8_t revision;
+		};
 		struct xt_match *match;
 	} u;
 	/* size of data */
@@ -130,7 +134,10 @@ struct ebt_entry_match {
 
 struct ebt_entry_watcher {
 	union {
-		char name[EBT_FUNCTION_MAXNAMELEN];
+		struct {
+			char name[EBT_EXTENSION_MAXNAMELEN];
+			uint8_t revision;
+		};
 		struct xt_target *watcher;
 	} u;
 	/* size of data */
@@ -140,7 +147,10 @@ struct ebt_entry_watcher {
 
 struct ebt_entry_target {
 	union {
-		char name[EBT_FUNCTION_MAXNAMELEN];
+		struct {
+			char name[EBT_EXTENSION_MAXNAMELEN];
+			uint8_t revision;
+		};
 		struct xt_target *target;
 	} u;
 	/* size of data */
-- 
cgit v1.2.3


From 5e43f899b03a3492ce5fc44e8900becb04dae9c0 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 30 Mar 2018 15:08:00 -0700
Subject: bpf: Check attach type at prog load time

== The problem ==

There are use-cases when a program of some type can be attached to
multiple attach points and those attach points must have different
permissions to access context or to call helpers.

E.g. context structure may have fields for both IPv4 and IPv6 but it
doesn't make sense to read from / write to IPv6 field when attach point
is somewhere in IPv4 stack.

Same applies to BPF-helpers: it may make sense to call some helper from
some attach point, but not from other for same prog type.

== The solution ==

Introduce `expected_attach_type` field in in `struct bpf_attr` for
`BPF_PROG_LOAD` command. If scenario described in "The problem" section
is the case for some prog type, the field will be checked twice:

1) At load time prog type is checked to see if attach type for it must
   be known to validate program permissions correctly. Prog will be
   rejected with EINVAL if it's the case and `expected_attach_type` is
   not specified or has invalid value.

2) At attach time `attach_type` is compared with `expected_attach_type`,
   if prog type requires to have one, and, if they differ, attach will
   be rejected with EINVAL.

The `expected_attach_type` is now available as part of `struct bpf_prog`
in both `bpf_verifier_ops->is_valid_access()` and
`bpf_verifier_ops->get_func_proto()` () and can be used to check context
accesses and calls to helpers correspondingly.

Initially the idea was discussed by Alexei Starovoitov <ast@fb.com> and
Daniel Borkmann <daniel@iogearbox.net> here:
https://marc.info/?l=linux-netdev&m=152107378717201&w=2

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1878201c2d77..102718624d1e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -296,6 +296,11 @@ union bpf_attr {
 		__u32		prog_flags;
 		char		prog_name[BPF_OBJ_NAME_LEN];
 		__u32		prog_ifindex;	/* ifindex of netdev to prep for */
+		/* For some prog types expected attach type must be known at
+		 * load time to verify attach type specific parts of prog
+		 * (context accesses, allowed helpers, etc).
+		 */
+		__u32		expected_attach_type;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
-- 
cgit v1.2.3


From 4fbac77d2d092b475dda9eea66da674369665427 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 30 Mar 2018 15:08:02 -0700
Subject: bpf: Hooks for sys_bind

== The problem ==

There is a use-case when all processes inside a cgroup should use one
single IP address on a host that has multiple IP configured.  Those
processes should use the IP for both ingress and egress, for TCP and UDP
traffic. So TCP/UDP servers should be bound to that IP to accept
incoming connections on it, and TCP/UDP clients should make outgoing
connections from that IP. It should not require changing application
code since it's often not possible.

Currently it's solved by intercepting glibc wrappers around syscalls
such as `bind(2)` and `connect(2)`. It's done by a shared library that
is preloaded for every process in a cgroup so that whenever TCP/UDP
server calls `bind(2)`, the library replaces IP in sockaddr before
passing arguments to syscall. When application calls `connect(2)` the
library transparently binds the local end of connection to that IP
(`bind(2)` with `IP_BIND_ADDRESS_NO_PORT` to avoid performance penalty).

Shared library approach is fragile though, e.g.:
* some applications clear env vars (incl. `LD_PRELOAD`);
* `/etc/ld.so.preload` doesn't help since some applications are linked
  with option `-z nodefaultlib`;
* other applications don't use glibc and there is nothing to intercept.

== The solution ==

The patch provides much more reliable in-kernel solution for the 1st
part of the problem: binding TCP/UDP servers on desired IP. It does not
depend on application environment and implementation details (whether
glibc is used or not).

It adds new eBPF program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` and
attach types `BPF_CGROUP_INET4_BIND` and `BPF_CGROUP_INET6_BIND`
(similar to already existing `BPF_CGROUP_INET_SOCK_CREATE`).

The new program type is intended to be used with sockets (`struct sock`)
in a cgroup and provided by user `struct sockaddr`. Pointers to both of
them are parts of the context passed to programs of newly added types.

The new attach types provides hooks in `bind(2)` system call for both
IPv4 and IPv6 so that one can write a program to override IP addresses
and ports user program tries to bind to and apply such a program for
whole cgroup.

== Implementation notes ==

[1]
Separate attach types for `AF_INET` and `AF_INET6` are added
intentionally to prevent reading/writing to offsets that don't make
sense for corresponding socket family. E.g. if user passes `sockaddr_in`
it doesn't make sense to read from / write to `user_ip6[]` context
fields.

[2]
The write access to `struct bpf_sock_addr_kern` is implemented using
special field as an additional "register".

There are just two registers in `sock_addr_convert_ctx_access`: `src`
with value to write and `dst` with pointer to context that can't be
changed not to break later instructions. But the fields, allowed to
write to, are not available directly and to access them address of
corresponding pointer has to be loaded first. To get additional register
the 1st not used by `src` and `dst` one is taken, its content is saved
to `bpf_sock_addr_kern.tmp_reg`, then the register is used to load
address of pointer field, and finally the register's content is restored
from the temporary field after writing `src` value.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 102718624d1e..ce3e69e3c793 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -136,6 +136,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_CGROUP_DEVICE,
 	BPF_PROG_TYPE_SK_MSG,
 	BPF_PROG_TYPE_RAW_TRACEPOINT,
+	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
 };
 
 enum bpf_attach_type {
@@ -147,6 +148,8 @@ enum bpf_attach_type {
 	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_DEVICE,
 	BPF_SK_MSG_VERDICT,
+	BPF_CGROUP_INET4_BIND,
+	BPF_CGROUP_INET6_BIND,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -1010,6 +1013,26 @@ struct bpf_map_info {
 	__u64 netns_ino;
 } __attribute__((aligned(8)));
 
+/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
+ * by user and intended to be used by socket (e.g. to bind to, depends on
+ * attach attach type).
+ */
+struct bpf_sock_addr {
+	__u32 user_family;	/* Allows 4-byte read, but no write. */
+	__u32 user_ip4;		/* Allows 1,2,4-byte read and 4-byte write.
+				 * Stored in network byte order.
+				 */
+	__u32 user_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.
+				 * Stored in network byte order.
+				 */
+	__u32 user_port;	/* Allows 4-byte read and write.
+				 * Stored in network byte order
+				 */
+	__u32 family;		/* Allows 4-byte read, but no write */
+	__u32 type;		/* Allows 4-byte read, but no write */
+	__u32 protocol;		/* Allows 4-byte read, but no write */
+};
+
 /* User bpf_sock_ops struct to access socket values and specify request ops
  * and their replies.
  * Some of this fields are in network (bigendian) byte order and may need
-- 
cgit v1.2.3


From d74bad4e74ee373787a9ae24197c17b7cdc428d5 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 30 Mar 2018 15:08:05 -0700
Subject: bpf: Hooks for sys_connect

== The problem ==

See description of the problem in the initial patch of this patch set.

== The solution ==

The patch provides much more reliable in-kernel solution for the 2nd
part of the problem: making outgoing connecttion from desired IP.

It adds new attach types `BPF_CGROUP_INET4_CONNECT` and
`BPF_CGROUP_INET6_CONNECT` for program type
`BPF_PROG_TYPE_CGROUP_SOCK_ADDR` that can be used to override both
source and destination of a connection at connect(2) time.

Local end of connection can be bound to desired IP using newly
introduced BPF-helper `bpf_bind()`. It allows to bind to only IP though,
and doesn't support binding to port, i.e. leverages
`IP_BIND_ADDRESS_NO_PORT` socket option. There are two reasons for this:
* looking for a free port is expensive and can affect performance
  significantly;
* there is no use-case for port.

As for remote end (`struct sockaddr *` passed by user), both parts of it
can be overridden, remote IP and remote port. It's useful if an
application inside cgroup wants to connect to another application inside
same cgroup or to itself, but knows nothing about IP assigned to the
cgroup.

Support is added for IPv4 and IPv6, for TCP and UDP.

IPv4 and IPv6 have separate attach types for same reason as sys_bind
hooks, i.e. to prevent reading from / writing to e.g. user_ip6 fields
when user passes sockaddr_in since it'd be out-of-bound.

== Implementation notes ==

The patch introduces new field in `struct proto`: `pre_connect` that is
a pointer to a function with same signature as `connect` but is called
before it. The reason is in some cases BPF hooks should be called way
before control is passed to `sk->sk_prot->connect`. Specifically
`inet_dgram_connect` autobinds socket before calling
`sk->sk_prot->connect` and there is no way to call `bpf_bind()` from
hooks from e.g. `ip4_datagram_connect` or `ip6_datagram_connect` since
it'd cause double-bind. On the other hand `proto.pre_connect` provides a
flexible way to add BPF hooks for connect only for necessary `proto` and
call them at desired time before `connect`. Since `bpf_bind()` is
allowed to bind only to IP and autobind in `inet_dgram_connect` binds
only port there is no chance of double-bind.

bpf_bind() sets `force_bind_address_no_port` to bind to only IP despite
of value of `bind_address_no_port` socket field.

bpf_bind() sets `with_lock` to `false` when calling to __inet_bind()
and __inet6_bind() since all call-sites, where bpf_bind() is called,
already hold socket lock.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ce3e69e3c793..77afaf1ba556 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -150,6 +150,8 @@ enum bpf_attach_type {
 	BPF_SK_MSG_VERDICT,
 	BPF_CGROUP_INET4_BIND,
 	BPF_CGROUP_INET6_BIND,
+	BPF_CGROUP_INET4_CONNECT,
+	BPF_CGROUP_INET6_CONNECT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -744,6 +746,13 @@ union bpf_attr {
  *     @flags: reserved for future use
  *     Return: SK_PASS
  *
+ * int bpf_bind(ctx, addr, addr_len)
+ *     Bind socket to address. Only binding to IP is supported, no port can be
+ *     set in addr.
+ *     @ctx: pointer to context of type bpf_sock_addr
+ *     @addr: pointer to struct sockaddr to bind socket to
+ *     @addr_len: length of sockaddr structure
+ *     Return: 0 on success or negative error code
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -809,7 +818,8 @@ union bpf_attr {
 	FN(msg_redirect_map),		\
 	FN(msg_apply_bytes),		\
 	FN(msg_cork_bytes),		\
-	FN(msg_pull_data),
+	FN(msg_pull_data),		\
+	FN(bind),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3


From aac3fc320d9404f2665a8b1249dc3170d5fa3caf Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 30 Mar 2018 15:08:07 -0700
Subject: bpf: Post-hooks for sys_bind

"Post-hooks" are hooks that are called right before returning from
sys_bind. At this time IP and port are already allocated and no further
changes to `struct sock` can happen before returning from sys_bind but
BPF program has a chance to inspect the socket and change sys_bind
result.

Specifically it can e.g. inspect what port was allocated and if it
doesn't satisfy some policy, BPF program can force sys_bind to fail and
return EPERM to user.

Another example of usage is recording the IP:port pair to some map to
use it in later calls to sys_connect. E.g. if some TCP server inside
cgroup was bound to some IP:port_n, it can be recorded to a map. And
later when some TCP client inside same cgroup is trying to connect to
127.0.0.1:port_n, BPF hook for sys_connect can override the destination
and connect application to IP:port_n instead of 127.0.0.1:port_n. That
helps forcing all applications inside a cgroup to use desired IP and not
break those applications if they e.g. use localhost to communicate
between each other.

== Implementation details ==

Post-hooks are implemented as two new attach types
`BPF_CGROUP_INET4_POST_BIND` and `BPF_CGROUP_INET6_POST_BIND` for
existing prog type `BPF_PROG_TYPE_CGROUP_SOCK`.

Separate attach types for IPv4 and IPv6 are introduced to avoid access
to IPv6 field in `struct sock` from `inet_bind()` and to IPv4 field from
`inet6_bind()` since those fields might not make sense in such cases.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 77afaf1ba556..c5ec89732a8d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -152,6 +152,8 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET6_BIND,
 	BPF_CGROUP_INET4_CONNECT,
 	BPF_CGROUP_INET6_CONNECT,
+	BPF_CGROUP_INET4_POST_BIND,
+	BPF_CGROUP_INET6_POST_BIND,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -948,6 +950,15 @@ struct bpf_sock {
 	__u32 protocol;
 	__u32 mark;
 	__u32 priority;
+	__u32 src_ip4;		/* Allows 1,2,4-byte read.
+				 * Stored in network byte order.
+				 */
+	__u32 src_ip6[4];	/* Allows 1,2,4-byte read.
+				 * Stored in network byte order.
+				 */
+	__u32 src_port;		/* Allows 4-byte read.
+				 * Stored in host byte order
+				 */
 };
 
 #define XDP_PACKET_HEADROOM 256
-- 
cgit v1.2.3


From 7a74d39cc2927302bc236397c1fdb1fe5be209ce Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 29 Mar 2018 23:20:44 +0200
Subject: tipc: tipc: rename address types in user api

The three address type structs in the user API have names that in
reality reflect the specific, non-Linux environment where they were
originally created.

We now give them more intuitive names, in accordance with how TIPC is
described in the current documentation.

struct tipc_portid   -> struct tipc_socket_addr
struct tipc_name     -> struct tipc_service_addr
struct tipc_name_seq -> struct tipc_service_range

To avoid confusion, we also update some commmets and macro names to
 match the new terminology.

For compatibility, we add macros that map all old names to the new ones.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc.h | 57 +++++++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 24 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 4ac9f1f02b06..156224ac3d74 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -45,33 +45,33 @@
  * TIPC addressing primitives
  */
 
-struct tipc_portid {
+struct tipc_socket_addr {
 	__u32 ref;
 	__u32 node;
 };
 
-struct tipc_name {
+struct tipc_service_addr {
 	__u32 type;
 	__u32 instance;
 };
 
-struct tipc_name_seq {
+struct tipc_service_range {
 	__u32 type;
 	__u32 lower;
 	__u32 upper;
 };
 
 /*
- * Application-accessible port name types
+ * Application-accessible service types
  */
 
-#define TIPC_CFG_SRV		0	/* configuration service name type */
-#define TIPC_TOP_SRV		1	/* topology service name type */
-#define TIPC_LINK_STATE		2	/* link state name type */
-#define TIPC_RESERVED_TYPES	64	/* lowest user-publishable name type */
+#define TIPC_NODE_STATE		0	/* node state service type */
+#define TIPC_TOP_SRV		1	/* topology server service type */
+#define TIPC_LINK_STATE		2	/* link state service type */
+#define TIPC_RESERVED_TYPES	64	/* lowest user-allowed service type */
 
 /*
- * Publication scopes when binding port names and port name sequences
+ * Publication scopes when binding service / service range
  */
 enum tipc_scope {
 	TIPC_CLUSTER_SCOPE = 2, /* 0 can also be used */
@@ -108,28 +108,28 @@ enum tipc_scope {
  * TIPC topology subscription service definitions
  */
 
-#define TIPC_SUB_PORTS		0x01	/* filter for port availability */
-#define TIPC_SUB_SERVICE	0x02	/* filter for service availability */
-#define TIPC_SUB_CANCEL		0x04	/* cancel a subscription */
+#define TIPC_SUB_PORTS          0x01    /* filter: evt at each match */
+#define TIPC_SUB_SERVICE        0x02    /* filter: evt at first up/last down */
+#define TIPC_SUB_CANCEL         0x04    /* filter: cancel a subscription */
 
 #define TIPC_WAIT_FOREVER	(~0)	/* timeout for permanent subscription */
 
 struct tipc_subscr {
-	struct tipc_name_seq seq;	/* name sequence of interest */
+	struct tipc_service_range seq;	/* range of interest */
 	__u32 timeout;			/* subscription duration (in ms) */
 	__u32 filter;			/* bitmask of filter options */
 	char usr_handle[8];		/* available for subscriber use */
 };
 
 #define TIPC_PUBLISHED		1	/* publication event */
-#define TIPC_WITHDRAWN		2	/* withdraw event */
+#define TIPC_WITHDRAWN		2	/* withdrawal event */
 #define TIPC_SUBSCR_TIMEOUT	3	/* subscription timeout event */
 
 struct tipc_event {
 	__u32 event;			/* event type */
-	__u32 found_lower;		/* matching name seq instances */
-	__u32 found_upper;		/*    "      "    "     "      */
-	struct tipc_portid port;	/* associated port */
+	__u32 found_lower;		/* matching range */
+	__u32 found_upper;		/*    "      "    */
+	struct tipc_socket_addr port;	/* associated socket */
 	struct tipc_subscr s;		/* associated subscription */
 };
 
@@ -149,20 +149,20 @@ struct tipc_event {
 #define SOL_TIPC	271
 #endif
 
-#define TIPC_ADDR_NAMESEQ	1
-#define TIPC_ADDR_MCAST		1
-#define TIPC_ADDR_NAME		2
-#define TIPC_ADDR_ID		3
+#define TIPC_ADDR_MCAST         1
+#define TIPC_SERVICE_RANGE      1
+#define TIPC_SERVICE_ADDR       2
+#define TIPC_SOCKET_ADDR        3
 
 struct sockaddr_tipc {
 	unsigned short family;
 	unsigned char  addrtype;
 	signed   char  scope;
 	union {
-		struct tipc_portid id;
-		struct tipc_name_seq nameseq;
+		struct tipc_socket_addr id;
+		struct tipc_service_range nameseq;
 		struct {
-			struct tipc_name name;
+			struct tipc_service_addr name;
 			__u32 domain;
 		} name;
 	} addr;
@@ -230,8 +230,13 @@ struct tipc_sioc_ln_req {
 /* The macros and functions below are deprecated:
  */
 
+#define TIPC_CFG_SRV		0
 #define TIPC_ZONE_SCOPE         1
 
+#define TIPC_ADDR_NAMESEQ	1
+#define TIPC_ADDR_NAME		2
+#define TIPC_ADDR_ID		3
+
 #define TIPC_NODE_BITS          12
 #define TIPC_CLUSTER_BITS       12
 #define TIPC_ZONE_BITS          8
@@ -250,6 +255,10 @@ struct tipc_sioc_ln_req {
 
 #define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK)
 
+#define tipc_portid tipc_socket_addr
+#define tipc_name tipc_service_addr
+#define tipc_name_seq tipc_service_range
+
 static inline __u32 tipc_addr(unsigned int zone,
 			      unsigned int cluster,
 			      unsigned int node)
-- 
cgit v1.2.3


From 7494cfa6d36d1556f17baa012dd93833620783db Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 29 Mar 2018 23:20:45 +0200
Subject: tipc: avoid possible string overflow

gcc points out that the combined length of the fixed-length inputs to
l->name is larger than the destination buffer size:

net/tipc/link.c: In function 'tipc_link_create':
net/tipc/link.c:465:26: error: '%s' directive writing up to 32 bytes
into a region of size between 26 and 58 [-Werror=format-overflow=]
sprintf(l->name, "%s:%s-%s:unknown", self_str, if_name, peer_str);

net/tipc/link.c:465:2: note: 'sprintf' output 11 or more bytes
(assuming 75) into a destination of size 60
sprintf(l->name, "%s:%s-%s:unknown", self_str, if_name, peer_str);

A detailed analysis reveals that the theoretical maximum length of
a link name is:
max self_str + 1 + max if_name + 1 + max peer_str + 1 + max if_name =
16 + 1 + 15 + 1 + 16 + 1 + 15 = 65
Since we also need space for a trailing zero we now set MAX_LINK_NAME
to 68.

Just to be on the safe side we also replace the sprintf() call with
snprintf().

Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address
hash values")
Reported-by: Arnd Bergmann <arnd@arndb.de>

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 156224ac3d74..bf6d28677cfe 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -216,7 +216,7 @@ struct tipc_group_req {
 #define TIPC_MAX_MEDIA_NAME	16
 #define TIPC_MAX_IF_NAME	16
 #define TIPC_MAX_BEARER_NAME	32
-#define TIPC_MAX_LINK_NAME	60
+#define TIPC_MAX_LINK_NAME	68
 
 #define SIOCGETLINKNAME		SIOCPROTOPRIVATE
 
-- 
cgit v1.2.3