From 6545135a5ed2eac064f23bee3a19a81cfffbe573 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Tue, 20 Jun 2017 13:39:14 +0200
Subject: drm/qxl: fix __user annotations

Drop them from u64 fields, tag local variables correctly instead.
While being at it switch the code to use u64_to_user_ptr().

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Daniel Vetter <daniel@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20170620113916.6967-2-kraxel@redhat.com
---
 include/uapi/drm/qxl_drm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/qxl_drm.h b/include/uapi/drm/qxl_drm.h
index 7eef42213051..880999d2d863 100644
--- a/include/uapi/drm/qxl_drm.h
+++ b/include/uapi/drm/qxl_drm.h
@@ -80,8 +80,8 @@ struct drm_qxl_reloc {
 };
 
 struct drm_qxl_command {
-	__u64	 __user command; /* void* */
-	__u64	 __user relocs; /* struct drm_qxl_reloc* */
+	__u64		command; /* void* */
+	__u64		relocs; /* struct drm_qxl_reloc* */
 	__u32		type;
 	__u32		command_size;
 	__u32		relocs_num;
@@ -91,7 +91,7 @@ struct drm_qxl_command {
 struct drm_qxl_execbuffer {
 	__u32		flags;		/* for future use */
 	__u32		commands_num;
-	__u64	 __user commands;	/* struct drm_qxl_command* */
+	__u64		commands;	/* struct drm_qxl_command* */
 };
 
 struct drm_qxl_update_area {
-- 
cgit v1.2.3


From 814abfabef3ceed390c10d06a0cc69a86454b6cf Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 17 Jul 2017 09:27:07 -0700
Subject: xdp: add bpf_redirect helper function

This adds support for a bpf_redirect helper function to the XDP
infrastructure. For now this only supports redirecting to the egress
path of a port.

In order to support drivers handling a xdp_buff natively this patches
uses a new ndo operation ndo_xdp_xmit() that takes pushes a xdp_buff
to the specified device.

If the program specifies either (a) an unknown device or (b) a device
that does not support the operation a BPF warning is thrown and the
XDP_ABORTED error code is returned.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e99e3e6f8b37..4dbb7a3f4677 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -717,6 +717,7 @@ enum xdp_action {
 	XDP_DROP,
 	XDP_PASS,
 	XDP_TX,
+	XDP_REDIRECT,
 };
 
 /* user accessible metadata for XDP packet hook
-- 
cgit v1.2.3


From 546ac1ffb70d25b56c1126940e5ec639c4dd7413 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 17 Jul 2017 09:28:56 -0700
Subject: bpf: add devmap, a map for storing net device references

Device map (devmap) is a BPF map, primarily useful for networking
applications, that uses a key to lookup a reference to a netdevice.

The map provides a clean way for BPF programs to build virtual port
to physical port maps. Additionally, it provides a scoping function
for the redirect action itself allowing multiple optimizations. Future
patches will leverage the map to provide batching at the XDP layer.

Another optimization/feature, that is not yet implemented, would be
to support multiple netdevices per key to support efficient multicast
and broadcast support.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4dbb7a3f4677..ecbb0e7e15bc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -104,6 +104,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_LPM_TRIE,
 	BPF_MAP_TYPE_ARRAY_OF_MAPS,
 	BPF_MAP_TYPE_HASH_OF_MAPS,
+	BPF_MAP_TYPE_DEVMAP,
 };
 
 enum bpf_prog_type {
-- 
cgit v1.2.3


From 97f91a7cf04ff605845c20948b8a80e54cbd3376 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 17 Jul 2017 09:29:18 -0700
Subject: bpf: add bpf_redirect_map helper routine

BPF programs can use the devmap with a bpf_redirect_map() helper
routine to forward packets to netdevice in map.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ecbb0e7e15bc..1106a8c4cd36 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -348,6 +348,11 @@ union bpf_attr {
  *     @flags: bit 0 - if set, redirect to ingress instead of egress
  *             other bits - reserved
  *     Return: TC_ACT_REDIRECT
+ * int bpf_redirect_map(key, map, flags)
+ *     redirect to endpoint in map
+ *     @key: index in map to lookup
+ *     @map: fd of map to do lookup in
+ *     @flags: --
  *
  * u32 bpf_get_route_realm(skb)
  *     retrieve a dst's tclassid
@@ -592,7 +597,8 @@ union bpf_attr {
 	FN(get_socket_uid),		\
 	FN(set_hash),			\
 	FN(setsockopt),			\
-	FN(skb_adjust_room),
+	FN(skb_adjust_room),		\
+	FN(redirect_map),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3


From fc60a8b675bd9499c71716d21c238eed5092ddfc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20F=C3=A4rber?= <afaerber@suse.de>
Date: Sun, 9 Jul 2017 22:29:42 +0200
Subject: tty: serial: owl: Implement console driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement serial console driver to complement earlycon.

Based on LeMaker linux-actions tree.

Signed-off-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/serial_core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index c34a2a3eeff5..38bea3217ead 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -70,6 +70,7 @@
 #define PORT_CLPS711X	33
 #define PORT_SA1100	34
 #define PORT_UART00	35
+#define PORT_OWL	36
 #define PORT_21285	37
 
 /* Sparc type numbers.  */
-- 
cgit v1.2.3


From eb0baf8a0d9259d168523b8e7c436b55ade7c546 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Tue, 18 Jul 2017 20:13:09 +0800
Subject: perf/core: Define the common branch type classification
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is often useful to know the branch types while analyzing branch data.
For example, a call is very different from a conditional branch.

Currently we have to look it up in binary while the binary may later not
be available and even the binary is available but user has to take some
time. It is very useful for user to check it directly in perf report.

Perf already has support for disassembling the branch instruction to get
the x86 branch type.

To keep consistent on kernel and userspace and make the classification
more common, the patch adds the common branch type classification
in perf_event.h.

The patch only defines a minimum but most common set of branch types.

PERF_BR_UNKNOWN         : unknown
PERF_BR_COND            ：conditional
PERF_BR_UNCOND          : unconditional
PERF_BR_IND             : indirect
PERF_BR_CALL            : function call
PERF_BR_IND_CALL        : indirect function call
PERF_BR_RET             : function return
PERF_BR_SYSCALL         : syscall
PERF_BR_SYSRET          : syscall return
PERF_BR_COND_CALL       : conditional function call
PERF_BR_COND_RET        : conditional function return

The patch also adds a new field type (4 bits) in perf_branch_entry
to record the branch type.

Since the disassembling of branch instruction needs some overhead,
a new PERF_SAMPLE_BRANCH_TYPE_SAVE is introduced to indicate if it
needs to disassemble the branch instruction and record the branch
type.

Change log:

v10: Not changed.

v9: Not changed.

v8: Change PERF_BR_NONE to PERF_BR_UNKNOWN.
    No other change.

v7: Just keep the most common branch types.
    Others are removed.

v6: Not changed.

v5: Not changed. The v5 patch series just change the userspace.

v4: Comparing to previous version, the major changes are:

1. Remove the PERF_BR_JCC_FWD/PERF_BR_JCC_BWD, they will be
   computed later in userspace.

2. Remove the "cross" field in perf_branch_entry. The cross page
   computing will be done later in userspace.

Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Link: http://lkml.kernel.org/r/1500379995-6449-2-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/uapi/linux/perf_event.h | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b1c0b187acfe..642db5fa3286 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -174,6 +174,8 @@ enum perf_branch_sample_type_shift {
 	PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT	= 14, /* no flags */
 	PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT	= 15, /* no cycles */
 
+	PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT	= 16, /* save branch type */
+
 	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
 };
 
@@ -198,9 +200,30 @@ enum perf_branch_sample_type {
 	PERF_SAMPLE_BRANCH_NO_FLAGS	= 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
 	PERF_SAMPLE_BRANCH_NO_CYCLES	= 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
 
+	PERF_SAMPLE_BRANCH_TYPE_SAVE	=
+		1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
+
 	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };
 
+/*
+ * Common flow change classification
+ */
+enum {
+	PERF_BR_UNKNOWN		= 0,	/* unknown */
+	PERF_BR_COND		= 1,	/* conditional */
+	PERF_BR_UNCOND		= 2,	/* unconditional  */
+	PERF_BR_IND		= 3,	/* indirect */
+	PERF_BR_CALL		= 4,	/* function call */
+	PERF_BR_IND_CALL	= 5,	/* indirect function call */
+	PERF_BR_RET		= 6,	/* function return */
+	PERF_BR_SYSCALL		= 7,	/* syscall */
+	PERF_BR_SYSRET		= 8,	/* syscall return */
+	PERF_BR_COND_CALL	= 9,	/* conditional function call */
+	PERF_BR_COND_RET	= 10,	/* conditional function return */
+	PERF_BR_MAX,
+};
+
 #define PERF_SAMPLE_BRANCH_PLM_ALL \
 	(PERF_SAMPLE_BRANCH_USER|\
 	 PERF_SAMPLE_BRANCH_KERNEL|\
@@ -1015,6 +1038,7 @@ union perf_mem_data_src {
  *     in_tx: running in a hardware transaction
  *     abort: aborting a hardware transaction
  *    cycles: cycles from last branch (or 0 if not supported)
+ *      type: branch type
  */
 struct perf_branch_entry {
 	__u64	from;
@@ -1024,7 +1048,8 @@ struct perf_branch_entry {
 		in_tx:1,    /* in transaction */
 		abort:1,    /* transaction abort */
 		cycles:16,  /* cycle count to last branch */
-		reserved:44;
+		type:4,     /* branch type */
+		reserved:40;
 };
 
 #endif /* _UAPI_LINUX_PERF_EVENT_H */
-- 
cgit v1.2.3


From 727f8914477e4642c7d1ff381667cdc4178b40c6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 21 Jul 2017 10:39:26 +0100
Subject: rxrpc: Expose UAPI definitions to userspace

Move UAPI definitions from the internal header and place them in a UAPI
header file so that userspace can make use of them.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/uapi/linux/rxrpc.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 include/uapi/linux/rxrpc.h

(limited to 'include/uapi')

diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h
new file mode 100644
index 000000000000..08e2fb9c70ae
--- /dev/null
+++ b/include/uapi/linux/rxrpc.h
@@ -0,0 +1,80 @@
+/* Types and definitions for AF_RXRPC.
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_RXRPC_H
+#define _UAPI_LINUX_RXRPC_H
+
+#include <linux/types.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+/*
+ * RxRPC socket address
+ */
+struct sockaddr_rxrpc {
+	sa_family_t	srx_family;	/* address family */
+	u16		srx_service;	/* service desired */
+	u16		transport_type;	/* type of transport socket (SOCK_DGRAM) */
+	u16		transport_len;	/* length of transport address */
+	union {
+		sa_family_t family;		/* transport address family */
+		struct sockaddr_in sin;		/* IPv4 transport address */
+		struct sockaddr_in6 sin6;	/* IPv6 transport address */
+	} transport;
+};
+
+/*
+ * RxRPC socket options
+ */
+#define RXRPC_SECURITY_KEY		1	/* [clnt] set client security key */
+#define RXRPC_SECURITY_KEYRING		2	/* [srvr] set ring of server security keys */
+#define RXRPC_EXCLUSIVE_CONNECTION	3	/* Deprecated; use RXRPC_EXCLUSIVE_CALL instead */
+#define RXRPC_MIN_SECURITY_LEVEL	4	/* minimum security level */
+#define RXRPC_UPGRADEABLE_SERVICE	5	/* Upgrade service[0] -> service[1] */
+#define RXRPC_SUPPORTED_CMSG		6	/* Get highest supported control message type */
+
+/*
+ * RxRPC control messages
+ * - If neither abort or accept are specified, the message is a data message.
+ * - terminal messages mean that a user call ID tag can be recycled
+ * - s/r/- indicate whether these are applicable to sendmsg() and/or recvmsg()
+ */
+enum rxrpc_cmsg_type {
+	RXRPC_USER_CALL_ID	= 1,	/* sr: user call ID specifier */
+	RXRPC_ABORT		= 2,	/* sr: abort request / notification [terminal] */
+	RXRPC_ACK		= 3,	/* -r: [Service] RPC op final ACK received [terminal] */
+	RXRPC_NET_ERROR		= 5,	/* -r: network error received [terminal] */
+	RXRPC_BUSY		= 6,	/* -r: server busy received [terminal] */
+	RXRPC_LOCAL_ERROR	= 7,	/* -r: local error generated [terminal] */
+	RXRPC_NEW_CALL		= 8,	/* -r: [Service] new incoming call notification */
+	RXRPC_ACCEPT		= 9,	/* s-: [Service] accept request */
+	RXRPC_EXCLUSIVE_CALL	= 10,	/* s-: Call should be on exclusive connection */
+	RXRPC_UPGRADE_SERVICE	= 11,	/* s-: Request service upgrade for client call */
+	RXRPC_TX_LENGTH		= 12,	/* s-: Total length of Tx data */
+	RXRPC__SUPPORTED
+};
+
+/*
+ * RxRPC security levels
+ */
+#define RXRPC_SECURITY_PLAIN	0	/* plain secure-checksummed packets only */
+#define RXRPC_SECURITY_AUTH	1	/* authenticated packets */
+#define RXRPC_SECURITY_ENCRYPT	2	/* encrypted packets */
+
+/*
+ * RxRPC security indices
+ */
+#define RXRPC_SECURITY_NONE	0	/* no security protocol */
+#define RXRPC_SECURITY_RXKAD	2	/* kaserver or kerberos 4 */
+#define RXRPC_SECURITY_RXGK	4	/* gssapi-based */
+#define RXRPC_SECURITY_RXK5	5	/* kerberos 5 */
+
+#endif /* _UAPI_LINUX_RXRPC_H */
-- 
cgit v1.2.3


From ddc6c70f07bb1f6dd39a2c6c430f7b4fa95199c8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 21 Jul 2017 10:07:10 +0100
Subject: rxrpc: Move the packet.h include file into net/rxrpc/

Move the protocol description header file into net/rxrpc/ and rename it to
protocol.h.  It's no longer necessary to expose it as packets are no longer
exposed to kernel services (such as AFS) that use the facility.

The abort codes are transferred to the UAPI header instead as we pass these
back to userspace and also to kernel services.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/uapi/linux/rxrpc.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h
index 08e2fb9c70ae..9656aad8f8f7 100644
--- a/include/uapi/linux/rxrpc.h
+++ b/include/uapi/linux/rxrpc.h
@@ -77,4 +77,48 @@ enum rxrpc_cmsg_type {
 #define RXRPC_SECURITY_RXGK	4	/* gssapi-based */
 #define RXRPC_SECURITY_RXK5	5	/* kerberos 5 */
 
+/*
+ * RxRPC-level abort codes
+ */
+#define RX_CALL_DEAD		-1	/* call/conn has been inactive and is shut down */
+#define RX_INVALID_OPERATION	-2	/* invalid operation requested / attempted */
+#define RX_CALL_TIMEOUT		-3	/* call timeout exceeded */
+#define RX_EOF			-4	/* unexpected end of data on read op */
+#define RX_PROTOCOL_ERROR	-5	/* low-level protocol error */
+#define RX_USER_ABORT		-6	/* generic user abort */
+#define RX_ADDRINUSE		-7	/* UDP port in use */
+#define RX_DEBUGI_BADTYPE	-8	/* bad debugging packet type */
+
+/*
+ * (un)marshalling abort codes (rxgen)
+ */
+#define RXGEN_CC_MARSHAL	-450
+#define RXGEN_CC_UNMARSHAL	-451
+#define RXGEN_SS_MARSHAL	-452
+#define RXGEN_SS_UNMARSHAL	-453
+#define RXGEN_DECODE		-454
+#define RXGEN_OPCODE		-455
+#define RXGEN_SS_XDRFREE	-456
+#define RXGEN_CC_XDRFREE	-457
+
+/*
+ * Rx kerberos security abort codes
+ * - unfortunately we have no generalised security abort codes to say things
+ *   like "unsupported security", so we have to use these instead and hope the
+ *   other side understands
+ */
+#define RXKADINCONSISTENCY	19270400	/* security module structure inconsistent */
+#define RXKADPACKETSHORT	19270401	/* packet too short for security challenge */
+#define RXKADLEVELFAIL		19270402	/* security level negotiation failed */
+#define RXKADTICKETLEN		19270403	/* ticket length too short or too long */
+#define RXKADOUTOFSEQUENCE	19270404	/* packet had bad sequence number */
+#define RXKADNOAUTH		19270405	/* caller not authorised */
+#define RXKADBADKEY		19270406	/* illegal key: bad parity or weak */
+#define RXKADBADTICKET		19270407	/* security object was passed a bad ticket */
+#define RXKADUNKNOWNKEY		19270408	/* ticket contained unknown key version number */
+#define RXKADEXPIRED		19270409	/* authentication expired */
+#define RXKADSEALEDINCON	19270410	/* sealed data inconsistent */
+#define RXKADDATALEN		19270411	/* user data too long */
+#define RXKADILLEGALLEVEL	19270412	/* caller not authorised to use encrypted conns */
+
 #endif /* _UAPI_LINUX_RXRPC_H */
-- 
cgit v1.2.3


From 784b4e612d42a2b7578d7fab2ed78940e10536bc Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Wed, 19 Jul 2017 16:32:23 +0200
Subject: netfilter: nf_tables: Attach process info to NFT_MSG_NEWGEN
 notifications

This is helpful for 'nft monitor' to track which process caused a given
change to the ruleset.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 683f6f88fcac..6f0a950e21c3 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1221,6 +1221,8 @@ enum nft_objref_attributes {
 enum nft_gen_attributes {
 	NFTA_GEN_UNSPEC,
 	NFTA_GEN_ID,
+	NFTA_GEN_PROC_PID,
+	NFTA_GEN_PROC_NAME,
 	__NFTA_GEN_MAX
 };
 #define NFTA_GEN_MAX		(__NFTA_GEN_MAX - 1)
-- 
cgit v1.2.3


From 2dee0e545894c23b1a2cc2019ac87dffb42e5984 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 8 Jun 2017 16:15:07 +0300
Subject: IB/uverbs: Enable QP creation with a given source QP number

Enable QP creation with a given source QP number, the created QP will
use the source QPN as its wire QP number.

To create such a QP, root privileges (i.e. CAP_NET_RAW) are required
from the user application.

This comes as a pre-patch for downstream patches in this series to
allow user space applications to accelerate traffic which is typically
handled by IPoIB ULP.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/ib_user_verbs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 270c350bedc6..63656d2e8705 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -578,7 +578,7 @@ struct ib_uverbs_ex_create_qp {
 	__u32 comp_mask;
 	__u32 create_flags;
 	__u32 rwq_ind_tbl_handle;
-	__u32  reserved1;
+	__u32  source_qpn;
 };
 
 struct ib_uverbs_open_qp {
-- 
cgit v1.2.3


From ea30b966f7dd6bcfb20c98a7f99608c7bb10bfac Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Wed, 21 Jun 2017 09:26:28 +0300
Subject: IB/mlx4: Add inline-receive support

When inline-receive is enabled, the HCA may write received
data into the receive WQE.

Inline-receive is enabled by setting its matching bit in
the QP context and each single-packet message with payload
not exceeding the receive WQE size will be delivered to
the WQE.

The completion report will indicate that the payload was placed to the WQE.

It includes:
1) Return maximum supported size of inline-receive by the hardware
in query_device vendor's data part.
2) Enable the feature when requested by the vendor data input.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/mlx4-abi.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h
index af431752655c..bf3bdba2f326 100644
--- a/include/uapi/rdma/mlx4-abi.h
+++ b/include/uapi/rdma/mlx4-abi.h
@@ -101,7 +101,8 @@ struct mlx4_ib_create_qp {
 	__u8	log_sq_bb_count;
 	__u8	log_sq_stride;
 	__u8	sq_no_prefetch;
-	__u8	reserved[5];
+	__u32	inl_recv_sz;
+	__u8	reserved;
 };
 
 #endif /* MLX4_ABI_USER_H */
-- 
cgit v1.2.3


From 400b1ebcfe31279895f1baa8ecaa390d9a4a0eef Mon Sep 17 00:00:00 2001
From: Guy Levi <guyle@mellanox.com>
Date: Tue, 4 Jul 2017 16:24:24 +0300
Subject: IB/mlx4: Add support for WQ related verbs

Support create/modify/destroy WQ related verbs.

The base IB object to enable RSS functionality is a WQ (i.e. ib_wq).
This patch implements the related WQ verbs as of create, modify and
destroy.

In downstream patches the WQ will be used as part of an indirection
table (i.e. ib_rwq_ind_table) to enable RSS QP creation.

Notes:
ConnectX-3 hardware requires consecutive WQNs list as receive descriptor
queues for the RSS QP. Hence, the driver manages consecutive ranges lists
per context which the user must respect.
Destroying the WQ does not return its WQN back to its range for
reusing. However, destroying all WQs from the same range releases the
range and in turn releases its WQNs for reusing.

Since the WQ object is not a natural object in the hardware, the driver
implements the WQ by the hardware QP.

As such, the WQ inherits its port from its RSS QP parent upon its
RST->INIT transition and by that time its state is applied to the
hardware.

Signed-off-by: Guy Levi <guyle@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/mlx4-abi.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h
index bf3bdba2f326..c9702a5f0bda 100644
--- a/include/uapi/rdma/mlx4-abi.h
+++ b/include/uapi/rdma/mlx4-abi.h
@@ -105,4 +105,18 @@ struct mlx4_ib_create_qp {
 	__u8	reserved;
 };
 
+struct mlx4_ib_create_wq {
+	__u64	buf_addr;
+	__u64	db_addr;
+	__u8	log_range_size;
+	__u8	reserved[3];
+	__u32   comp_mask;
+	__u32   reserved1;
+};
+
+struct mlx4_ib_modify_wq {
+	__u32	comp_mask;
+	__u32	reserved;
+};
+
 #endif /* MLX4_ABI_USER_H */
-- 
cgit v1.2.3


From b8d46ca035060e70f5f0da849d86720752d5aa17 Mon Sep 17 00:00:00 2001
From: Guy Levi <guyle@mellanox.com>
Date: Tue, 4 Jul 2017 16:24:25 +0300
Subject: IB/mlx4: Add support for WQ indirection table related verbs

To enable RSS functionality the IB indirection table object (i.e.
ib_rwq_ind_table) should be used.
This patch implements the related verbs as of create and destroy an
indirection table.

In downstream patches the indirection table will be used as part of RSS
QP creation.

Signed-off-by: Guy Levi <guyle@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/mlx4-abi.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h
index c9702a5f0bda..5591d955ba00 100644
--- a/include/uapi/rdma/mlx4-abi.h
+++ b/include/uapi/rdma/mlx4-abi.h
@@ -119,4 +119,8 @@ struct mlx4_ib_modify_wq {
 	__u32	reserved;
 };
 
+struct mlx4_ib_create_rwq_ind_tbl_resp {
+	__u32	response_length;
+	__u32	reserved;
+};
 #endif /* MLX4_ABI_USER_H */
-- 
cgit v1.2.3


From 3078f5f1bd8b6c8aef77b8ef4d49671fa6eb058e Mon Sep 17 00:00:00 2001
From: Guy Levi <guyle@mellanox.com>
Date: Tue, 4 Jul 2017 16:24:26 +0300
Subject: IB/mlx4: Add support for RSS QP

Add support to work with a RSS QP by using an indirection table object
upon QP creation. Other related QP verbs (e.g. modify/destroy/query) were
updated as well for that QP mode.

Notes:
- The RX hash properties are supplied as driver private data.
- The RSS QP port is used on the associated WQs in its indirection
  table. Applying different ports during WQ life time is not allowed.
- The expected RSS QP flow is: create, modify(RST->INIT),
  modify(RST->RTR), destroy.

Signed-off-by: Guy Levi <guyle@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/mlx4-abi.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h
index 5591d955ba00..d915cab37ec3 100644
--- a/include/uapi/rdma/mlx4-abi.h
+++ b/include/uapi/rdma/mlx4-abi.h
@@ -95,6 +95,16 @@ struct mlx4_ib_create_srq_resp {
 	__u32	reserved;
 };
 
+struct mlx4_ib_create_qp_rss {
+	__u64   rx_hash_fields_mask;
+	__u8    rx_hash_function;
+	__u8    rx_key_len;
+	__u8    reserved[6];
+	__u8    rx_hash_key[40];
+	__u32   comp_mask;
+	__u32   reserved1;
+};
+
 struct mlx4_ib_create_qp {
 	__u64	buf_addr;
 	__u64	db_addr;
@@ -123,4 +133,27 @@ struct mlx4_ib_create_rwq_ind_tbl_resp {
 	__u32	response_length;
 	__u32	reserved;
 };
+
+/* RX Hash function flags */
+enum mlx4_ib_rx_hash_function_flags {
+	MLX4_IB_RX_HASH_FUNC_TOEPLITZ	= 1 << 0,
+};
+
+/*
+ * RX Hash flags, these flags allows to set which incoming packet's field should
+ * participates in RX Hash. Each flag represent certain packet's field,
+ * when the flag is set the field that is represented by the flag will
+ * participate in RX Hash calculation.
+ */
+enum mlx4_ib_rx_hash_fields {
+	MLX4_IB_RX_HASH_SRC_IPV4	= 1 << 0,
+	MLX4_IB_RX_HASH_DST_IPV4	= 1 << 1,
+	MLX4_IB_RX_HASH_SRC_IPV6	= 1 << 2,
+	MLX4_IB_RX_HASH_DST_IPV6	= 1 << 3,
+	MLX4_IB_RX_HASH_SRC_PORT_TCP	= 1 << 4,
+	MLX4_IB_RX_HASH_DST_PORT_TCP	= 1 << 5,
+	MLX4_IB_RX_HASH_SRC_PORT_UDP	= 1 << 6,
+	MLX4_IB_RX_HASH_DST_PORT_UDP	= 1 << 7
+};
+
 #endif /* MLX4_ABI_USER_H */
-- 
cgit v1.2.3


From d08477aa975e97f1dc64c0ae59cebf98520456ce Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 29 Jun 2017 09:28:50 -0500
Subject: fcntl: Don't use ambiguous SIG_POLL si_codes

We have a weird and problematic intersection of features that when
they all come together result in ambiguous siginfo values, that
we can not support properly.

- Supporting fcntl(F_SETSIG,...) with arbitrary valid signals.

- Using positive values for POLL_IN, POLL_OUT, POLL_MSG, ..., etc
  that imply they are signal specific si_codes and using the
  aforementioned arbitrary signal to deliver them.

- Supporting injection of arbitrary siginfo values for debugging and
  checkpoint/restore.

The result is that just looking at siginfo si_codes of 1 to 6 are
ambigious.  It could either be a signal specific si_code or it could
be a generic si_code.

For most of the kernel this is a non-issue but for sending signals
with siginfo it is impossible to play back the kernel signals and
get the same result.

Strictly speaking when the si_code was changed from SI_SIGIO to
POLL_IN and friends between 2.2 and 2.4 this functionality was not
ambiguous, as only real time signals were supported.  Before 2.4 was
released the kernel began supporting siginfo with non realtime signals
so they could give details of why the signal was sent.

The result is that if F_SETSIG is set to one of the signals with signal
specific si_codes then user space can not know why the signal was sent.

I grepped through a bunch of userspace programs using debian code
search to get a feel for how often people choose a signal that results
in an ambiguous si_code.  I only found one program doing so and it was
using SIGCHLD to test the F_SETSIG functionality, and did not appear
to be a real world usage.

Therefore the ambiguity does not appears to be a real world problem in
practice.  Remove the ambiguity while introducing the smallest chance
of breakage by changing the si_code to SI_SIGIO when signals with
signal specific si_codes are targeted.

Fixes: v2.3.40 -- Added support for queueing non-rt signals
Fixes: v2.3.21 -- Changed the si_code from SI_SIGIO
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/uapi/asm-generic/siginfo.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index 9c4eca6b374a..9e956ea94d57 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -184,7 +184,7 @@ typedef struct siginfo {
 #define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */
 #define SI_MESGQ __SI_CODE(__SI_MESGQ,-3) /* sent by real time mesq state change */
 #define SI_ASYNCIO	-4		/* sent by AIO completion */
-#define SI_SIGIO	-5		/* sent by queued SIGIO */
+#define SI_SIGIO __SI_CODE(__SI_POLL,-5) /* sent by queued SIGIO */
 #define SI_TKILL	-6		/* sent by tkill system call */
 #define SI_DETHREAD	-7		/* sent by execve() killing subsidiary threads */
 
@@ -259,7 +259,7 @@ typedef struct siginfo {
 #define NSIGCHLD	6
 
 /*
- * SIGPOLL si_codes
+ * SIGPOLL (or any other signal without signal specific si_codes) si_codes
  */
 #define POLL_IN		(__SI_POLL|1)	/* data input available */
 #define POLL_OUT	(__SI_POLL|2)	/* output buffers available */
-- 
cgit v1.2.3


From cc731525f26af85a1c3537da41e0abd1d35e0bdb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 16 Jul 2017 22:36:59 -0500
Subject: signal: Remove kernel interal si_code magic

struct siginfo is a union and the kernel since 2.4 has been hiding a union
tag in the high 16bits of si_code using the values:
__SI_KILL
__SI_TIMER
__SI_POLL
__SI_FAULT
__SI_CHLD
__SI_RT
__SI_MESGQ
__SI_SYS

While this looks plausible on the surface, in practice this situation has
not worked well.

- Injected positive signals are not copied to user space properly
  unless they have these magic high bits set.

- Injected positive signals are not reported properly by signalfd
  unless they have these magic high bits set.

- These kernel internal values leaked to userspace via ptrace_peek_siginfo

- It was possible to inject these kernel internal values and cause the
  the kernel to misbehave.

- Kernel developers got confused and expected these kernel internal values
  in userspace in kernel self tests.

- Kernel developers got confused and set si_code to __SI_FAULT which
  is SI_USER in userspace which causes userspace to think an ordinary user
  sent the signal and that it was not kernel generated.

- The values make it impossible to reorganize the code to transform
  siginfo_copy_to_user into a plain copy_to_user.  As si_code must
  be massaged before being passed to userspace.

So remove these kernel internal si codes and make the kernel code simpler
and more maintainable.

To replace these kernel internal magic si_codes introduce the helper
function siginfo_layout, that takes a signal number and an si_code and
computes which union member of siginfo is being used.  Have
siginfo_layout return an enumeration so that gcc will have enough
information to warn if a switch statement does not handle all of union
members.

A couple of architectures have a messed up ABI that defines signal
specific duplications of SI_USER which causes more special cases in
siginfo_layout than I would like.  The good news is only problem
architectures pay the cost.

Update all of the code that used the previous magic __SI_ values to
use the new SIL_ values and to call siginfo_layout to get those
values.  Escept where not all of the cases are handled remove the
defaults in the switch statements so that if a new case is missed in
the future the lack will show up at compile time.

Modify the code that copies siginfo si_code to userspace to just copy
the value and not cast si_code to a short first.  The high bits are no
longer used to hold a magic union member.

Fixup the siginfo header files to stop including the __SI_ values in
their constants and for the headers that were missing it to properly
update the number of si_codes for each signal type.

The fixes to copy_siginfo_from_user32 implementations has the
interesting property that several of them perviously should never have
worked as the __SI_ values they depended up where kernel internal.
With that dependency gone those implementations should work much
better.

The idea of not passing the __SI_ values out to userspace and then
not reinserting them has been tested with criu and criu worked without
changes.

Ref: 2.4.0-test1
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/uapi/asm-generic/siginfo.h | 115 +++++++++++++++----------------------
 1 file changed, 46 insertions(+), 69 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index 9e956ea94d57..e5aa6794cea4 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -151,29 +151,6 @@ typedef struct siginfo {
 #define si_arch		_sifields._sigsys._arch
 #endif
 
-#ifdef __KERNEL__
-#define __SI_MASK	0xffff0000u
-#define __SI_KILL	(0 << 16)
-#define __SI_TIMER	(1 << 16)
-#define __SI_POLL	(2 << 16)
-#define __SI_FAULT	(3 << 16)
-#define __SI_CHLD	(4 << 16)
-#define __SI_RT		(5 << 16)
-#define __SI_MESGQ	(6 << 16)
-#define __SI_SYS	(7 << 16)
-#define __SI_CODE(T,N)	((T) | ((N) & 0xffff))
-#else /* __KERNEL__ */
-#define __SI_KILL	0
-#define __SI_TIMER	0
-#define __SI_POLL	0
-#define __SI_FAULT	0
-#define __SI_CHLD	0
-#define __SI_RT		0
-#define __SI_MESGQ	0
-#define __SI_SYS	0
-#define __SI_CODE(T,N)	(N)
-#endif /* __KERNEL__ */
-
 /*
  * si_code values
  * Digital reserves positive values for kernel-generated signals.
@@ -181,10 +158,10 @@ typedef struct siginfo {
 #define SI_USER		0		/* sent by kill, sigsend, raise */
 #define SI_KERNEL	0x80		/* sent by the kernel from somewhere */
 #define SI_QUEUE	-1		/* sent by sigqueue */
-#define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */
-#define SI_MESGQ __SI_CODE(__SI_MESGQ,-3) /* sent by real time mesq state change */
+#define SI_TIMER	-2		/* sent by timer expiration */
+#define SI_MESGQ	-3		/* sent by real time mesq state change */
 #define SI_ASYNCIO	-4		/* sent by AIO completion */
-#define SI_SIGIO __SI_CODE(__SI_POLL,-5) /* sent by queued SIGIO */
+#define SI_SIGIO	-5		/* sent by queued SIGIO */
 #define SI_TKILL	-6		/* sent by tkill system call */
 #define SI_DETHREAD	-7		/* sent by execve() killing subsidiary threads */
 
@@ -194,86 +171,86 @@ typedef struct siginfo {
 /*
  * SIGILL si_codes
  */
-#define ILL_ILLOPC	(__SI_FAULT|1)	/* illegal opcode */
-#define ILL_ILLOPN	(__SI_FAULT|2)	/* illegal operand */
-#define ILL_ILLADR	(__SI_FAULT|3)	/* illegal addressing mode */
-#define ILL_ILLTRP	(__SI_FAULT|4)	/* illegal trap */
-#define ILL_PRVOPC	(__SI_FAULT|5)	/* privileged opcode */
-#define ILL_PRVREG	(__SI_FAULT|6)	/* privileged register */
-#define ILL_COPROC	(__SI_FAULT|7)	/* coprocessor error */
-#define ILL_BADSTK	(__SI_FAULT|8)	/* internal stack error */
+#define ILL_ILLOPC	1	/* illegal opcode */
+#define ILL_ILLOPN	2	/* illegal operand */
+#define ILL_ILLADR	3	/* illegal addressing mode */
+#define ILL_ILLTRP	4	/* illegal trap */
+#define ILL_PRVOPC	5	/* privileged opcode */
+#define ILL_PRVREG	6	/* privileged register */
+#define ILL_COPROC	7	/* coprocessor error */
+#define ILL_BADSTK	8	/* internal stack error */
 #define NSIGILL		8
 
 /*
  * SIGFPE si_codes
  */
-#define FPE_INTDIV	(__SI_FAULT|1)	/* integer divide by zero */
-#define FPE_INTOVF	(__SI_FAULT|2)	/* integer overflow */
-#define FPE_FLTDIV	(__SI_FAULT|3)	/* floating point divide by zero */
-#define FPE_FLTOVF	(__SI_FAULT|4)	/* floating point overflow */
-#define FPE_FLTUND	(__SI_FAULT|5)	/* floating point underflow */
-#define FPE_FLTRES	(__SI_FAULT|6)	/* floating point inexact result */
-#define FPE_FLTINV	(__SI_FAULT|7)	/* floating point invalid operation */
-#define FPE_FLTSUB	(__SI_FAULT|8)	/* subscript out of range */
+#define FPE_INTDIV	1	/* integer divide by zero */
+#define FPE_INTOVF	2	/* integer overflow */
+#define FPE_FLTDIV	3	/* floating point divide by zero */
+#define FPE_FLTOVF	4	/* floating point overflow */
+#define FPE_FLTUND	5	/* floating point underflow */
+#define FPE_FLTRES	6	/* floating point inexact result */
+#define FPE_FLTINV	7	/* floating point invalid operation */
+#define FPE_FLTSUB	8	/* subscript out of range */
 #define NSIGFPE		8
 
 /*
  * SIGSEGV si_codes
  */
-#define SEGV_MAPERR	(__SI_FAULT|1)	/* address not mapped to object */
-#define SEGV_ACCERR	(__SI_FAULT|2)	/* invalid permissions for mapped object */
-#define SEGV_BNDERR	(__SI_FAULT|3)  /* failed address bound checks */
-#define SEGV_PKUERR	(__SI_FAULT|4)  /* failed protection key checks */
+#define SEGV_MAPERR	1	/* address not mapped to object */
+#define SEGV_ACCERR	2	/* invalid permissions for mapped object */
+#define SEGV_BNDERR	3	/* failed address bound checks */
+#define SEGV_PKUERR	4	/* failed protection key checks */
 #define NSIGSEGV	4
 
 /*
  * SIGBUS si_codes
  */
-#define BUS_ADRALN	(__SI_FAULT|1)	/* invalid address alignment */
-#define BUS_ADRERR	(__SI_FAULT|2)	/* non-existent physical address */
-#define BUS_OBJERR	(__SI_FAULT|3)	/* object specific hardware error */
+#define BUS_ADRALN	1	/* invalid address alignment */
+#define BUS_ADRERR	2	/* non-existent physical address */
+#define BUS_OBJERR	3	/* object specific hardware error */
 /* hardware memory error consumed on a machine check: action required */
-#define BUS_MCEERR_AR	(__SI_FAULT|4)
+#define BUS_MCEERR_AR	4
 /* hardware memory error detected in process but not consumed: action optional*/
-#define BUS_MCEERR_AO	(__SI_FAULT|5)
+#define BUS_MCEERR_AO	5
 #define NSIGBUS		5
 
 /*
  * SIGTRAP si_codes
  */
-#define TRAP_BRKPT	(__SI_FAULT|1)	/* process breakpoint */
-#define TRAP_TRACE	(__SI_FAULT|2)	/* process trace trap */
-#define TRAP_BRANCH     (__SI_FAULT|3)  /* process taken branch trap */
-#define TRAP_HWBKPT     (__SI_FAULT|4)  /* hardware breakpoint/watchpoint */
+#define TRAP_BRKPT	1	/* process breakpoint */
+#define TRAP_TRACE	2	/* process trace trap */
+#define TRAP_BRANCH     3	/* process taken branch trap */
+#define TRAP_HWBKPT     4	/* hardware breakpoint/watchpoint */
 #define NSIGTRAP	4
 
 /*
  * SIGCHLD si_codes
  */
-#define CLD_EXITED	(__SI_CHLD|1)	/* child has exited */
-#define CLD_KILLED	(__SI_CHLD|2)	/* child was killed */
-#define CLD_DUMPED	(__SI_CHLD|3)	/* child terminated abnormally */
-#define CLD_TRAPPED	(__SI_CHLD|4)	/* traced child has trapped */
-#define CLD_STOPPED	(__SI_CHLD|5)	/* child has stopped */
-#define CLD_CONTINUED	(__SI_CHLD|6)	/* stopped child has continued */
+#define CLD_EXITED	1	/* child has exited */
+#define CLD_KILLED	2	/* child was killed */
+#define CLD_DUMPED	3	/* child terminated abnormally */
+#define CLD_TRAPPED	4	/* traced child has trapped */
+#define CLD_STOPPED	5	/* child has stopped */
+#define CLD_CONTINUED	6	/* stopped child has continued */
 #define NSIGCHLD	6
 
 /*
  * SIGPOLL (or any other signal without signal specific si_codes) si_codes
  */
-#define POLL_IN		(__SI_POLL|1)	/* data input available */
-#define POLL_OUT	(__SI_POLL|2)	/* output buffers available */
-#define POLL_MSG	(__SI_POLL|3)	/* input message available */
-#define POLL_ERR	(__SI_POLL|4)	/* i/o error */
-#define POLL_PRI	(__SI_POLL|5)	/* high priority input available */
-#define POLL_HUP	(__SI_POLL|6)	/* device disconnected */
+#define POLL_IN		1	/* data input available */
+#define POLL_OUT	2	/* output buffers available */
+#define POLL_MSG	3	/* input message available */
+#define POLL_ERR	4	/* i/o error */
+#define POLL_PRI	5	/* high priority input available */
+#define POLL_HUP	6	/* device disconnected */
 #define NSIGPOLL	6
 
 /*
  * SIGSYS si_codes
  */
-#define SYS_SECCOMP		(__SI_SYS|1)	/* seccomp triggered */
-#define NSIGSYS	1
+#define SYS_SECCOMP	1	/* seccomp triggered */
+#define NSIGSYS		1
 
 /*
  * sigevent definitions
-- 
cgit v1.2.3


From ad84dad2160d5f36bb471b391462d651c887d693 Mon Sep 17 00:00:00 2001
From: "Amrani, Ram" <Ram.Amrani@cavium.com>
Date: Mon, 26 Jun 2017 19:05:05 +0300
Subject: RDMA/qedr: notify user application if DPM is supported

Direct Packet Mode support may be disabled, e.g, due to limited
resources. Notifying the user application prevents wasting cycles
on attempting to send these kind of packets.

Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/qedr-abi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h
index 75c270d839c8..2684004ec4fd 100644
--- a/include/uapi/rdma/qedr-abi.h
+++ b/include/uapi/rdma/qedr-abi.h
@@ -49,6 +49,7 @@ struct qedr_alloc_ucontext_resp {
 	__u32 sges_per_recv_wr;
 	__u32 sges_per_srq_wr;
 	__u32 max_cqes;
+	__u8 dpm_enabled;
 };
 
 struct qedr_alloc_pd_ureq {
-- 
cgit v1.2.3


From 67cbe3532c2cd84303a2073cedad6b8bcad13be3 Mon Sep 17 00:00:00 2001
From: "Amrani, Ram" <Ram.Amrani@cavium.com>
Date: Mon, 26 Jun 2017 19:05:06 +0300
Subject: RDMA/qedr: notify user application of supported WIDs

The number of supported WIDs, if they are supported at all, can be
limited due to resources. Notifying the user space application the
number of available WIDs allows it to utilize them correctly.

Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/qedr-abi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h
index 2684004ec4fd..54b64357ab24 100644
--- a/include/uapi/rdma/qedr-abi.h
+++ b/include/uapi/rdma/qedr-abi.h
@@ -50,6 +50,8 @@ struct qedr_alloc_ucontext_resp {
 	__u32 sges_per_srq_wr;
 	__u32 max_cqes;
 	__u8 dpm_enabled;
+	__u8 wids_enabled;
+	__u16 wid_count;
 };
 
 struct qedr_alloc_pd_ureq {
-- 
cgit v1.2.3


From f30994622b2bf8e4fa224237ac65304b27a9cb6a Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 25 Jul 2017 11:27:17 -0700
Subject: drm/vc4: Add an ioctl for labeling GEM BOs for summary stats

This has proven immensely useful for debugging memory leaks and
overallocation (which is a rather serious concern on the platform,
given that we typically run at about 256MB of CMA out of up to 1GB
total memory, with framebuffers that are about 8MB ecah).

The state of the art without this is to dump debug logs from every GL
application, guess as to kernel allocations based on bo_stats, and try
to merge that all together into a global picture of memory allocation
state.  With this, you can add a couple of calls to the debug build of
the 3D driver and get a pretty detailed view of GPU memory usage from
/debug/dri/0/bo_stats (or when we debug print to dmesg on allocation
failure).

The Mesa side currently labels at the gallium resource level (so you
see that a 1920x20 pixmap has been created, presumably for the window
system panel), but we could extend that to be even more useful with
glObjectLabel() names being sent all the way down to the kernel.

(partial) example of sorted debugfs output with Mesa labeling all
resources:

               kernel BO cache:  16392kb BOs (3)
       tiling shadow 1920x1080:   8160kb BOs (1)
       resource 1920x1080@32/0:   8160kb BOs (1)
scanout resource 1920x1080@32/0:   8100kb BOs (1)
                        kernel:   8100kb BOs (1)

v2: Use strndup_user(), use lockdep assertion instead of just a
    comment, fix an array[-1] reference, extend comment about name
    freeing.

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20170725182718.31468-2-eric@anholt.net
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 include/uapi/drm/vc4_drm.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index 6ac4c5c014cb..551628e571f9 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -40,6 +40,7 @@ extern "C" {
 #define DRM_VC4_GET_PARAM                         0x07
 #define DRM_VC4_SET_TILING                        0x08
 #define DRM_VC4_GET_TILING                        0x09
+#define DRM_VC4_LABEL_BO                          0x0a
 
 #define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
 #define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
@@ -51,6 +52,7 @@ extern "C" {
 #define DRM_IOCTL_VC4_GET_PARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_PARAM, struct drm_vc4_get_param)
 #define DRM_IOCTL_VC4_SET_TILING          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SET_TILING, struct drm_vc4_set_tiling)
 #define DRM_IOCTL_VC4_GET_TILING          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling)
+#define DRM_IOCTL_VC4_LABEL_BO            DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo)
 
 struct drm_vc4_submit_rcl_surface {
 	__u32 hindex; /* Handle index, or ~0 if not present. */
@@ -311,6 +313,15 @@ struct drm_vc4_set_tiling {
 	__u64 modifier;
 };
 
+/**
+ * struct drm_vc4_label_bo - Attach a name to a BO for debug purposes.
+ */
+struct drm_vc4_label_bo {
+	__u32 handle;
+	__u32 len;
+	__u64 name;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From ca1136c99b66b1566781ff12ecddc635d570f932 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 12 Jul 2017 11:49:53 -0700
Subject: blktrace: export cgroup info in trace

Currently blktrace isn't cgroup aware. blktrace prints out task name of
current context, but the task of current context isn't always in the
cgroup where the BIO comes from. We can't use task name to find out IO
cgroup. For example, Writeback BIOs always comes from flusher thread but
the BIOs are for different blk cgroups. Request could be requeued and
dispatched from completely different tasks. MD/DM are another examples.

This patch tries to fix the gap. We print out cgroup fhandle info in
blktrace. Userspace can use open_by_handle_at() syscall to find the
cgroup by fhandle. Or userspace can use name_to_handle_at() syscall to
find fhandle for a cgroup and use a BPF program to filter out blktrace
for a specific cgroup.

We add a new 'blk_cgroup' trace option for blk tracer. It's default off.
Application which doesn't know the new option isn't affected.  When it's
on, we output fhandle info right after blk_io_trace with an extra bit
set in event action. So from application point of view, blktrace with
the option will output new actions.

I didn't change blk trace event yet, since I'm not sure if changing the
trace event output is an ABI issue. If not, I'll do it later.

Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/blktrace_api.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h
index c590ca6bfbd9..9cdaedeadb84 100644
--- a/include/uapi/linux/blktrace_api.h
+++ b/include/uapi/linux/blktrace_api.h
@@ -52,6 +52,7 @@ enum blktrace_act {
 	__BLK_TA_REMAP,			/* bio was remapped */
 	__BLK_TA_ABORT,			/* request aborted */
 	__BLK_TA_DRV_DATA,		/* driver-specific binary data */
+	__BLK_TA_CGROUP = 1 << 8,	/* from a cgroup*/
 };
 
 /*
@@ -61,6 +62,7 @@ enum blktrace_notify {
 	__BLK_TN_PROCESS = 0,		/* establish pid/name mapping */
 	__BLK_TN_TIMESTAMP,		/* include system clock */
 	__BLK_TN_MESSAGE,		/* Character string message */
+	__BLK_TN_CGROUP = __BLK_TA_CGROUP, /* from a cgroup */
 };
 
 
@@ -107,6 +109,7 @@ struct blk_io_trace {
 	__u32 cpu;		/* on what cpu did it happen */
 	__u16 error;		/* completion error */
 	__u16 pdu_len;		/* length of data after this trace */
+	/* cgroup id will be stored here if exists */
 };
 
 /*
-- 
cgit v1.2.3


From 1a5f3da20bd966220931239fbd31e6ac6ff42251 Mon Sep 17 00:00:00 2001
From: Vidya Sagar Ravipati <vidya.chowdary@gmail.com>
Date: Thu, 27 Jul 2017 16:47:26 -0700
Subject: net: ethtool: add support for forward error correction modes

Forward Error Correction (FEC) modes i.e Base-R
and Reed-Solomon modes are introduced in 25G/40G/100G standards
for providing good BER at high speeds. Various networking devices
which support 25G/40G/100G provides ability to manage supported FEC
modes and the lack of FEC encoding control and reporting today is a
source for interoperability issues for many vendors.
FEC capability as well as specific FEC mode i.e. Base-R
or RS modes can be requested or advertised through bits D44:47 of
base link codeword.

This patch set intends to provide option under ethtool to manage
and report FEC encoding settings for networking devices as per
IEEE 802.3 bj, bm and by specs.

set-fec/show-fec option(s) are designed to provide control and
report the FEC encoding on the link.

SET FEC option:
root@tor: ethtool --set-fec  swp1 encoding [off | RS | BaseR | auto]

Encoding: Types of encoding
Off    :  Turning off any encoding
RS     :  enforcing RS-FEC encoding on supported speeds
BaseR  :  enforcing Base R encoding on supported speeds
Auto   :  IEEE defaults for the speed/medium combination

Here are a few examples of what we would expect if encoding=auto:
- if autoneg is on, we are  expecting FEC to be negotiated as on or off
  as long as protocol supports it
- if the hardware is capable of detecting the FEC encoding on it's
      receiver it will reconfigure its encoder to match
- in absence of the above, the configuration would be set to IEEE
  defaults.

>From our  understanding , this is essentially what most hardware/driver
combinations are doing today in the absence of a way for users to
control the behavior.

SHOW FEC option:
root@tor: ethtool --show-fec  swp1
FEC parameters for swp1:
Active FEC encodings: RS
Configured FEC encodings:  RS | BaseR

ETHTOOL DEVNAME output modification:

ethtool devname output:
root@tor:~# ethtool swp1
Settings for swp1:
root@hpe-7712-03:~# ethtool swp18
Settings for swp18:
    Supported ports: [ FIBRE ]
    Supported link modes:   40000baseCR4/Full
                            40000baseSR4/Full
                            40000baseLR4/Full
                            100000baseSR4/Full
                            100000baseCR4/Full
                            100000baseLR4_ER4/Full
    Supported pause frame use: No
    Supports auto-negotiation: Yes
    Supported FEC modes: [RS | BaseR | None | Not reported]
    Advertised link modes:  Not reported
    Advertised pause frame use: No
    Advertised auto-negotiation: No
    Advertised FEC modes: [RS | BaseR | None | Not reported]
<<<< One or more FEC modes
    Speed: 100000Mb/s
    Duplex: Full
    Port: FIBRE
    PHYAD: 106
    Transceiver: internal
    Auto-negotiation: off
    Link detected: yes

This patch includes following changes
a) New ETHTOOL_SFECPARAM/SFECPARAM API, handled by
  the new get_fecparam/set_fecparam callbacks, provides support
  for configuration of forward error correction modes.
b) Link mode bits for FEC modes i.e. None (No FEC mode), RS, BaseR/FC
  are defined so that users can configure these fec modes for supported
  and advertising fields as part of link autonegotiation.

Signed-off-by: Vidya Sagar Ravipati <vidya.chowdary@gmail.com>
Signed-off-by: Dustin Byford <dustin@cumulusnetworks.com>
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 48 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 7d4a594d5d58..9c041dae8e2c 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1238,6 +1238,47 @@ struct ethtool_per_queue_op {
 	char	data[];
 };
 
+/**
+ * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters
+ * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM
+ * @active_fec: FEC mode which is active on porte
+ * @fec: Bitmask of supported/configured FEC modes
+ * @rsvd: Reserved for future extensions. i.e FEC bypass feature.
+ *
+ * Drivers should reject a non-zero setting of @autoneg when
+ * autoneogotiation is disabled (or not supported) for the link.
+ *
+ */
+struct ethtool_fecparam {
+	__u32   cmd;
+	/* bitmask of FEC modes */
+	__u32   active_fec;
+	__u32   fec;
+	__u32   reserved;
+};
+
+/**
+ * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration
+ * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported
+ * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver
+ * @ETHTOOL_FEC_OFF: No FEC Mode
+ * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode
+ * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode
+ */
+enum ethtool_fec_config_bits {
+	ETHTOOL_FEC_NONE_BIT,
+	ETHTOOL_FEC_AUTO_BIT,
+	ETHTOOL_FEC_OFF_BIT,
+	ETHTOOL_FEC_RS_BIT,
+	ETHTOOL_FEC_BASER_BIT,
+};
+
+#define ETHTOOL_FEC_NONE		(1 << ETHTOOL_FEC_NONE_BIT)
+#define ETHTOOL_FEC_AUTO		(1 << ETHTOOL_FEC_AUTO_BIT)
+#define ETHTOOL_FEC_OFF			(1 << ETHTOOL_FEC_OFF_BIT)
+#define ETHTOOL_FEC_RS			(1 << ETHTOOL_FEC_RS_BIT)
+#define ETHTOOL_FEC_BASER		(1 << ETHTOOL_FEC_BASER_BIT)
+
 /* CMDs currently supported */
 #define ETHTOOL_GSET		0x00000001 /* DEPRECATED, Get settings.
 					    * Please use ETHTOOL_GLINKSETTINGS
@@ -1330,6 +1371,8 @@ struct ethtool_per_queue_op {
 #define ETHTOOL_SLINKSETTINGS	0x0000004d /* Set ethtool_link_settings */
 #define ETHTOOL_PHY_GTUNABLE	0x0000004e /* Get PHY tunable configuration */
 #define ETHTOOL_PHY_STUNABLE	0x0000004f /* Set PHY tunable configuration */
+#define ETHTOOL_GFECPARAM	0x00000050 /* Get FEC settings */
+#define ETHTOOL_SFECPARAM	0x00000051 /* Set FEC settings */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
@@ -1387,6 +1430,9 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_2500baseT_Full_BIT	= 47,
 	ETHTOOL_LINK_MODE_5000baseT_Full_BIT	= 48,
 
+	ETHTOOL_LINK_MODE_FEC_NONE_BIT	= 49,
+	ETHTOOL_LINK_MODE_FEC_RS_BIT	= 50,
+	ETHTOOL_LINK_MODE_FEC_BASER_BIT	= 51,
 
 	/* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
 	 * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
@@ -1395,7 +1441,7 @@ enum ethtool_link_mode_bit_indices {
 	 */
 
 	__ETHTOOL_LINK_MODE_LAST
-	  = ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+	  = ETHTOOL_LINK_MODE_FEC_BASER_BIT,
 };
 
 #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name)	\
-- 
cgit v1.2.3


From 64c83d837329531252a1a0f0dfdd4fd607e1d8e9 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sun, 30 Jul 2017 13:24:49 -0400
Subject: net netlink: Add new type NLA_BITFIELD32

Generic bitflags attribute content sent to the kernel by user.
With this netlink attr type the user can either set or unset a
flag in the kernel.

The value is a bitmap that defines the bit values being set
The selector is a bitmask that defines which value bit is to be
considered.

A check is made to ensure the rules that a kernel subsystem always
conforms to bitflags the kernel already knows about. i.e
if the user tries to set a bit flag that is not understood then
the _it will be rejected_.

In the most basic form, the user specifies the attribute policy as:
[ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags },

where myvalidflags is the bit mask of the flags the kernel understands.

If the user _does not_ provide myvalidflags then the attribute will
also be rejected.

Examples:
value = 0x0, and selector = 0x1
implies we are selecting bit 1 and we want to set its value to 0.

value = 0x2, and selector = 0x2
implies we are selecting bit 2 and we want to set its value to 1.

Suggested-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/netlink.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index f86127a46cfc..f4fc9c9e123d 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -226,5 +226,22 @@ struct nlattr {
 #define NLA_ALIGN(len)		(((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
 #define NLA_HDRLEN		((int) NLA_ALIGN(sizeof(struct nlattr)))
 
+/* Generic 32 bitflags attribute content sent to the kernel.
+ *
+ * The value is a bitmap that defines the values being set
+ * The selector is a bitmask that defines which value is legit
+ *
+ * Examples:
+ *  value = 0x0, and selector = 0x1
+ *  implies we are selecting bit 1 and we want to set its value to 0.
+ *
+ *  value = 0x2, and selector = 0x2
+ *  implies we are selecting bit 2 and we want to set its value to 1.
+ *
+ */
+struct nla_bitfield32 {
+	__u32 value;
+	__u32 selector;
+};
 
 #endif /* _UAPI__LINUX_NETLINK_H */
-- 
cgit v1.2.3


From 90825b23a887f06f6c05bdde77b200c5fe9b6217 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sun, 30 Jul 2017 13:24:51 -0400
Subject: net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch

When you dump hundreds of thousands of actions, getting only 32 per
dump batch even when the socket buffer and memory allocations allow
is inefficient.

With this change, the user will get as many as possibly fitting
within the given constraints available to the kernel.

The top level action TLV space is extended. An attribute
TCA_ROOT_FLAGS is used to carry flags; flag TCA_FLAG_LARGE_DUMP_ON
is set by the user indicating the user is capable of processing
these large dumps. Older user space which doesnt set this flag
doesnt get the large (than 32) batches.
The kernel uses the TCA_ROOT_COUNT attribute to tell the user how many
actions are put in a single batch. As such user space app knows how long
to iterate (independent of the type of action being dumped)
instead of hardcoded maximum of 32 thus maintaining backward compat.

Some results dumping 1.5M actions below:
first an unpatched tc which doesnt understand these features...

prompt$ time -p tc actions ls action gact | grep index | wc -l
1500000
real 1388.43
user 2.07
sys 1386.79

Now lets see a patched tc which sets the correct flags when requesting
a dump:

prompt$ time -p updatedtc actions ls action gact | grep index | wc -l
1500000
real 178.13
user 2.02
sys 176.96

That is about 8x performance improvement for tc app which sets its
receive buffer to about 32K.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index d148505010a7..bfa80a6164d9 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -683,10 +683,28 @@ struct tcamsg {
 	unsigned char	tca__pad1;
 	unsigned short	tca__pad2;
 };
+
+enum {
+	TCA_ROOT_UNSPEC,
+	TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+#define TCAA_MAX TCA_ROOT_TAB
+	TCA_ROOT_FLAGS,
+	TCA_ROOT_COUNT,
+	__TCA_ROOT_MAX,
+#define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
 #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
 #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
-#define TCAA_MAX 1
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
+ * actions in a dump. All dump responses will contain the number of actions
+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)
 
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
-- 
cgit v1.2.3


From e62e484df04964ac947c679ef4f00c54ae5395aa Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sun, 30 Jul 2017 13:24:52 -0400
Subject: net sched actions: add time filter for action dumping

This patch adds support for filtering based on time since last used.
When we are dumping a large number of actions it is useful to
have the option of filtering based on when the action was last
used to reduce the amount of data crossing to user space.

With this patch the user space app sets the TCA_ROOT_TIME_DELTA
attribute with the value in milliseconds with "time of interest
since now".  The kernel converts this to jiffies and does the
filtering comparison matching entries that have seen activity
since then and returns them to user space.
Old kernels and old tc continue to work in legacy mode since
they dont specify this attribute.

Some example (we have 400 actions bound to 400 filters); at
installation time. Using updated when tc setting the time of
interest to 120 seconds earlier (we see 400 actions):
prompt$ hackedtc actions ls action gact since 120000| grep index | wc -l
400

go get some coffee and wait for > 120 seconds and try again:

prompt$ hackedtc actions ls action gact since 120000 | grep index | wc -l
0

Lets see a filter bound to one of these actions:
....
filter pref 10 u32
filter pref 10 u32 fh 800: ht divisor 1
filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10  (rule hit 2 success 1)
  match 7f000002/ffffffff at 12 (success 1 )
    action order 1: gact action pass
     random type none pass val 0
     index 23 ref 2 bind 1 installed 1145 sec used 802 sec
    Action statistics:
    Sent 84 bytes 1 pkt (dropped 0, overlimits 0 requeues 0)
    backlog 0b 0p requeues 0
....

that coffee took long, no? It was good.

Now lets ping -c 1 127.0.0.2, then run the actions again:
prompt$ hackedtc actions ls action gact since 120 | grep index | wc -l
1

More details please:
prompt$ hackedtc -s actions ls action gact since 120000

    action order 0: gact action pass
     random type none pass val 0
     index 23 ref 2 bind 1 installed 1270 sec used 30 sec
    Action statistics:
    Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
    backlog 0b 0p requeues 0

And the filter?

filter pref 10 u32
filter pref 10 u32 fh 800: ht divisor 1
filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10  (rule hit 4 success 2)
  match 7f000002/ffffffff at 12 (success 2 )
    action order 1: gact action pass
     random type none pass val 0
     index 23 ref 2 bind 1 installed 1324 sec used 84 sec
    Action statistics:
    Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
    backlog 0b 0p requeues 0

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index bfa80a6164d9..dab7dad9e01a 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -691,6 +691,7 @@ enum {
 #define TCAA_MAX TCA_ROOT_TAB
 	TCA_ROOT_FLAGS,
 	TCA_ROOT_COUNT,
+	TCA_ROOT_TIME_DELTA, /* in msecs */
 	__TCA_ROOT_MAX,
 #define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
 };
-- 
cgit v1.2.3


From e46abbcc05aa8a16b0e7f5c94e86d11af9aa2770 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 27 Jul 2017 16:56:41 +0200
Subject: netfilter: nf_tables: Allow table names of up to 255 chars

Allocate all table names dynamically to allow for arbitrary lengths but
introduce NFT_NAME_MAXLEN as an upper sanity boundary. It's value was
chosen to allow using a domain name as per RFC 1035.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 6f0a950e21c3..0b94e572ef16 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1,7 +1,8 @@
 #ifndef _LINUX_NF_TABLES_H
 #define _LINUX_NF_TABLES_H
 
-#define NFT_TABLE_MAXNAMELEN	32
+#define NFT_NAME_MAXLEN		256
+#define NFT_TABLE_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_CHAIN_MAXNAMELEN	32
 #define NFT_SET_MAXNAMELEN	32
 #define NFT_OBJ_MAXNAMELEN	32
-- 
cgit v1.2.3


From b7263e071aba736cea9e71cdf2e76dfa7aebd039 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 27 Jul 2017 16:56:42 +0200
Subject: netfilter: nf_tables: Allow chain name of up to 255 chars

Same conversion as for table names, use NFT_NAME_MAXLEN as upper
boundary as well.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 0b94e572ef16..d9c03a8608ee 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -3,7 +3,7 @@
 
 #define NFT_NAME_MAXLEN		256
 #define NFT_TABLE_MAXNAMELEN	NFT_NAME_MAXLEN
-#define NFT_CHAIN_MAXNAMELEN	32
+#define NFT_CHAIN_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_SET_MAXNAMELEN	32
 #define NFT_OBJ_MAXNAMELEN	32
 #define NFT_USERDATA_MAXLEN	256
-- 
cgit v1.2.3


From 387454901bd62022ac1b04e15bd8d4fcc60bbed4 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 27 Jul 2017 16:56:43 +0200
Subject: netfilter: nf_tables: Allow set names of up to 255 chars

Same conversion as for table names, use NFT_NAME_MAXLEN as upper
boundary as well.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index d9c03a8608ee..b5e73e80b7b6 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -4,7 +4,7 @@
 #define NFT_NAME_MAXLEN		256
 #define NFT_TABLE_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_CHAIN_MAXNAMELEN	NFT_NAME_MAXLEN
-#define NFT_SET_MAXNAMELEN	32
+#define NFT_SET_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_OBJ_MAXNAMELEN	32
 #define NFT_USERDATA_MAXLEN	256
 
-- 
cgit v1.2.3


From 615095752100748e221028fc96163c2b78185ae4 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 27 Jul 2017 16:56:44 +0200
Subject: netfilter: nf_tables: Allow object names of up to 255 chars

Same conversion as for table names, use NFT_NAME_MAXLEN as upper
boundary as well.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index b5e73e80b7b6..be25cf69295b 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -5,7 +5,7 @@
 #define NFT_TABLE_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_CHAIN_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_SET_MAXNAMELEN	NFT_NAME_MAXLEN
-#define NFT_OBJ_MAXNAMELEN	32
+#define NFT_OBJ_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_USERDATA_MAXLEN	256
 
 /**
-- 
cgit v1.2.3


From 3282e65558b3651e230ee985c174c35cb2fedaf1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 30 Jul 2017 03:57:23 +0200
Subject: tcp: remove unused mib counters

was used by tcp prequeue and header prediction.
TCPFORWARDRETRANS use was removed in january.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/snmp.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index d85693295798..b3f346fb9fe3 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -184,14 +184,7 @@ enum
 	LINUX_MIB_DELAYEDACKLOST,		/* DelayedACKLost */
 	LINUX_MIB_LISTENOVERFLOWS,		/* ListenOverflows */
 	LINUX_MIB_LISTENDROPS,			/* ListenDrops */
-	LINUX_MIB_TCPPREQUEUED,			/* TCPPrequeued */
-	LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG,	/* TCPDirectCopyFromBacklog */
-	LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE,	/* TCPDirectCopyFromPrequeue */
-	LINUX_MIB_TCPPREQUEUEDROPPED,		/* TCPPrequeueDropped */
-	LINUX_MIB_TCPHPHITS,			/* TCPHPHits */
-	LINUX_MIB_TCPHPHITSTOUSER,		/* TCPHPHitsToUser */
 	LINUX_MIB_TCPPUREACKS,			/* TCPPureAcks */
-	LINUX_MIB_TCPHPACKS,			/* TCPHPAcks */
 	LINUX_MIB_TCPRENORECOVERY,		/* TCPRenoRecovery */
 	LINUX_MIB_TCPSACKRECOVERY,		/* TCPSackRecovery */
 	LINUX_MIB_TCPSACKRENEGING,		/* TCPSACKReneging */
@@ -208,14 +201,12 @@ enum
 	LINUX_MIB_TCPSACKFAILURES,		/* TCPSackFailures */
 	LINUX_MIB_TCPLOSSFAILURES,		/* TCPLossFailures */
 	LINUX_MIB_TCPFASTRETRANS,		/* TCPFastRetrans */
-	LINUX_MIB_TCPFORWARDRETRANS,		/* TCPForwardRetrans */
 	LINUX_MIB_TCPSLOWSTARTRETRANS,		/* TCPSlowStartRetrans */
 	LINUX_MIB_TCPTIMEOUTS,			/* TCPTimeouts */
 	LINUX_MIB_TCPLOSSPROBES,		/* TCPLossProbes */
 	LINUX_MIB_TCPLOSSPROBERECOVERY,		/* TCPLossProbeRecovery */
 	LINUX_MIB_TCPRENORECOVERYFAIL,		/* TCPRenoRecoveryFail */
 	LINUX_MIB_TCPSACKRECOVERYFAIL,		/* TCPSackRecoveryFail */
-	LINUX_MIB_TCPSCHEDULERFAILED,		/* TCPSchedulerFailed */
 	LINUX_MIB_TCPRCVCOLLAPSED,		/* TCPRcvCollapsed */
 	LINUX_MIB_TCPDSACKOLDSENT,		/* TCPDSACKOldSent */
 	LINUX_MIB_TCPDSACKOFOSENT,		/* TCPDSACKOfoSent */
-- 
cgit v1.2.3


From bb7c19f96012720b895111300b9d9f3f858c3a69 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Fri, 28 Jul 2017 10:28:21 -0700
Subject: tcp: add related fields into SCM_TIMESTAMPING_OPT_STATS

Add the following stats into SCM_TIMESTAMPING_OPT_STATS control msg:
    TCP_NLA_PACING_RATE
    TCP_NLA_DELIVERY_RATE
    TCP_NLA_SND_CWND
    TCP_NLA_REORDERING
    TCP_NLA_MIN_RTT
    TCP_NLA_RECUR_RETRANS
    TCP_NLA_DELIVERY_RATE_APP_LMT

Signed-off-by: Wei Wang <weiwan@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tcp.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index a5507c977497..030e594bab45 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -231,6 +231,14 @@ enum {
 	TCP_NLA_SNDBUF_LIMITED,	/* Time (usec) limited by send buffer */
 	TCP_NLA_DATA_SEGS_OUT,	/* Data pkts sent including retransmission */
 	TCP_NLA_TOTAL_RETRANS,	/* Data pkts retransmitted */
+	TCP_NLA_PACING_RATE,    /* Pacing rate in bytes per second */
+	TCP_NLA_DELIVERY_RATE,  /* Delivery rate in bytes per second */
+	TCP_NLA_SND_CWND,       /* Sending congestion window */
+	TCP_NLA_REORDERING,     /* Reordering metric */
+	TCP_NLA_MIN_RTT,        /* minimum RTT */
+	TCP_NLA_RECUR_RETRANS,  /* Recurring retransmits for the current pkt */
+	TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */
+
 };
 
 /* for TCP_MD5SIG socket option */
-- 
cgit v1.2.3


From e6fc3b68558e4c6d8d160b5daf2511b99afa8814 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben@bwidawsk.net>
Date: Sun, 23 Jul 2017 20:46:38 -0700
Subject: drm: Plumb modifiers through plane init

This is the plumbing for supporting fb modifiers on planes. Modifiers
have already been introduced to some extent, but this series will extend
this to allow querying modifiers per plane. Based on this, the client to
enable optimal modifications for framebuffers.

This patch simply allows the DRM drivers to initialize their list of
supported modifiers upon initializing the plane.

v2: A minor addition from Daniel

v3:
* Updated commit message
* s/INVALID/DRM_FORMAT_MOD_INVALID (Liviu)
* Remove some excess newlines (Liviu)
* Update comment for > 64 modifiers (Liviu)

v4: Minor comment adjustments (Liviu)

v5: Some new platforms added due to rebase

v6: Add some missed plane inits (or maybe they're new - who knows at
this point) (Daniel)

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Daniel Stone <daniels@collabora.com> (v2)
Reviewed-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Daniel Stone <daniels@collabora.com>
---
 include/uapi/drm/drm_fourcc.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 7586c46f68bf..76c9101a7fc6 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -185,6 +185,8 @@ extern "C" {
 #define DRM_FORMAT_MOD_VENDOR_BROADCOM 0x07
 /* add more to the end as needed */
 
+#define DRM_FORMAT_RESERVED	      ((1ULL << 56) - 1)
+
 #define fourcc_mod_code(vendor, val) \
 	((((__u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | (val & 0x00ffffffffffffffULL))
 
@@ -196,6 +198,15 @@ extern "C" {
  * authoritative source for all of these.
  */
 
+/*
+ * Invalid Modifier
+ *
+ * This modifier can be used as a sentinel to terminate the format modifiers
+ * list, or to initialize a variable with an invalid modifier. It might also be
+ * used to report an error back to userspace for certain APIs.
+ */
+#define DRM_FORMAT_MOD_INVALID	fourcc_mod_code(NONE, DRM_FORMAT_RESERVED)
+
 /*
  * Linear Layout
  *
-- 
cgit v1.2.3


From db1689aa61bd1efb5ce9b896e7aa860a85b7f1b6 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben@bwidawsk.net>
Date: Sun, 23 Jul 2017 20:46:39 -0700
Subject: drm: Create a format/modifier blob

Updated blob layout (Rob, Daniel, Kristian, xerpi)

v2:
* Removed __packed, and alignment (.+)
* Fix indent in drm_format_modifier fields (Liviu)
* Remove duplicated modifier > 64 check (Liviu)
* Change comment about modifier (Liviu)
* Remove arguments to blob creation, use plane instead (Liviu)
* Fix data types (Ben)
* Make the blob part of uapi (Daniel)

v3:
Remove unused ret field.
Change i, and j to unsigned int (Emil)

v4:
Use plane->modifier_count instead of recounting (Daniel)

v5:
Rename modifiers to modifiers_property (Ville)
Use sizeof(__u32) instead to reflect UAPI nature (Ville)
Make BUILD_BUG_ON for blob header size

Cc: Rob Clark <robdclark@gmail.com>
Cc: Kristian H. Kristensen <hoegsberg@gmail.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Daniel Stone <daniels@collabora.com> (v2)
Reviewed-by: Liviu Dudau <liviu@dudau.co.uk> (v2)
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com> (v3)
Signed-off-by: Daniel Stone <daniels@collabora.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170724034641.13369-2-ben@bwidawsk.net
---
 include/uapi/drm/drm_mode.h | 50 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 403339f98a92..a2bb7161f020 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -712,6 +712,56 @@ struct drm_mode_atomic {
 	__u64 user_data;
 };
 
+struct drm_format_modifier_blob {
+#define FORMAT_BLOB_CURRENT 1
+	/* Version of this blob format */
+	u32 version;
+
+	/* Flags */
+	u32 flags;
+
+	/* Number of fourcc formats supported */
+	u32 count_formats;
+
+	/* Where in this blob the formats exist (in bytes) */
+	u32 formats_offset;
+
+	/* Number of drm_format_modifiers */
+	u32 count_modifiers;
+
+	/* Where in this blob the modifiers exist (in bytes) */
+	u32 modifiers_offset;
+
+	/* u32 formats[] */
+	/* struct drm_format_modifier modifiers[] */
+};
+
+struct drm_format_modifier {
+	/* Bitmask of formats in get_plane format list this info applies to. The
+	 * offset allows a sliding window of which 64 formats (bits).
+	 *
+	 * Some examples:
+	 * In today's world with < 65 formats, and formats 0, and 2 are
+	 * supported
+	 * 0x0000000000000005
+	 *		  ^-offset = 0, formats = 5
+	 *
+	 * If the number formats grew to 128, and formats 98-102 are
+	 * supported with the modifier:
+	 *
+	 * 0x0000003c00000000 0000000000000000
+	 *		  ^
+	 *		  |__offset = 64, formats = 0x3c00000000
+	 *
+	 */
+	__u64 formats;
+	__u32 offset;
+	__u32 pad;
+
+	/* The modifier that applies to the >get_plane format list bitmask. */
+	__u64 modifier;
+};
+
 /**
  * Create a new 'blob' data property, copying length bytes from data pointer,
  * and returning new blob ID.
-- 
cgit v1.2.3


From cdbc78ba702650b02b9e3e957dbaa725423bdf1e Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 Jul 2017 10:42:35 -0600
Subject: drm/msm: Remove __user from __u64 data types

__user should be used to identify user pointers and not __u64
variables containing pointers.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 include/uapi/drm/msm_drm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 26c54f6d595d..ad4eb2863e70 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -171,7 +171,7 @@ struct drm_msm_gem_submit_cmd {
 	__u32 size;           /* in, cmdstream size */
 	__u32 pad;
 	__u32 nr_relocs;      /* in, number of submit_reloc's */
-	__u64 __user relocs;  /* in, ptr to array of submit_reloc's */
+	__u64 relocs;         /* in, ptr to array of submit_reloc's */
 };
 
 /* Each buffer referenced elsewhere in the cmdstream submit (ie. the
@@ -215,8 +215,8 @@ struct drm_msm_gem_submit {
 	__u32 fence;          /* out */
 	__u32 nr_bos;         /* in, number of submit_bo's */
 	__u32 nr_cmds;        /* in, number of submit_cmd's */
-	__u64 __user bos;     /* in, ptr to array of submit_bo's */
-	__u64 __user cmds;    /* in, ptr to array of submit_cmd's */
+	__u64 bos;            /* in, ptr to array of submit_bo's */
+	__u64 cmds;           /* in, ptr to array of submit_cmd's */
 	__s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */
 };
 
-- 
cgit v1.2.3


From f89823c212246d0671cc51e69894a3df1a743aee Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Date: Thu, 3 Aug 2017 18:05:50 +0100
Subject: drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface

The motivation behind this new interface is expose at runtime the
creation of new OA configs which can be used as part of the i915 perf
open interface. This will enable the kernel to learn new configs which
may be experimental, or otherwise not part of the core set currently
available through the i915 perf interface.

v2: Drop DRM_ERROR for userspace errors (Matthew)
    Add padding to userspace structure (Matthew)
    s/guid/uuid/ (Matthew)

v3: Use u32 instead of int to iterate through registers (Matthew)

v4: Lock access to dynamic config list (Lionel)

v5: by Matthew:
    Fix uninitialized error values
    Fix incorrect unwiding when opening perf stream
    Use kmalloc_array() to store register
    Use uuid_is_valid() to valid config uuids
    Declare ioctls as write only
    Check padding members are set to 0
    by Lionel:
    Return ENOENT rather than EINVAL when trying to remove non
    existing config

v6: by Chris:
    Use ref counts for OA configs
    Store UUID in drm_i915_perf_oa_config rather then using pointer
    Shuffle fields of drm_i915_perf_oa_config to avoid padding

v7: by Chris
    Rename uapi pointers fields to end with '_ptr'

v8: by Andrzej, Marek, Sebastian
    Update register whitelisting
    by Lionel
    Add more register names for documentation
    Allow configuration programming in non-paranoid mode
    Add support for value filter for a couple of registers already
    programmed in other part of the kernel

v9: Documentation fix (Lionel)
    Allow writing WAIT_FOR_RC6_EXIT only on Gen8+ (Andrzej)

v10: Perform read access_ok() on register pointers (Lionel)

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Andrzej Datczuk <andrzej.datczuk@intel.com>
Reviewed-by: Andrzej Datczuk <andrzej.datczuk@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-2-lionel.g.landwerlin@intel.com
---
 include/uapi/drm/i915_drm.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7ccbd6a2bbe0..ce3833fa1e06 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -260,6 +260,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_CONTEXT_GETPARAM	0x34
 #define DRM_I915_GEM_CONTEXT_SETPARAM	0x35
 #define DRM_I915_PERF_OPEN		0x36
+#define DRM_I915_PERF_ADD_CONFIG	0x37
+#define DRM_I915_PERF_REMOVE_CONFIG	0x38
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -315,6 +317,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param)
 #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param)
 #define DRM_IOCTL_I915_PERF_OPEN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
+#define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
+#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1467,6 +1471,22 @@ enum drm_i915_perf_record_type {
 	DRM_I915_PERF_RECORD_MAX /* non-ABI */
 };
 
+/**
+ * Structure to upload perf dynamic configuration into the kernel.
+ */
+struct drm_i915_perf_oa_config {
+	/** String formatted like "%08x-%04x-%04x-%04x-%012x" */
+	char uuid[36];
+
+	__u32 n_mux_regs;
+	__u32 n_boolean_regs;
+	__u32 n_flex_regs;
+
+	__u64 __user mux_regs_ptr;
+	__u64 __user boolean_regs_ptr;
+	__u64 __user flex_regs_ptr;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From 61e4d01e16acddadb9723143637a20417fa67ac9 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 3 Aug 2017 13:28:20 +0200
Subject: ipv6: fib: Add offload indication to routes

Allow user space applications to see which routes are offloaded and
which aren't by setting the RTNH_F_OFFLOAD flag when dumping them.

To be consistent with IPv4, offload indication is provided on a
per-nexthop basis.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ipv6_route.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h
index d496c02e14bc..33e2a5732bd1 100644
--- a/include/uapi/linux/ipv6_route.h
+++ b/include/uapi/linux/ipv6_route.h
@@ -35,6 +35,7 @@
 #define RTF_PREF(pref)	((pref) << 27)
 #define RTF_PREF_MASK	0x18000000
 
+#define RTF_OFFLOAD	0x20000000	/* offloaded route		*/
 #define RTF_PCPU	0x40000000	/* read-only: can not be set by user */
 #define RTF_LOCAL	0x80000000
 
-- 
cgit v1.2.3


From 52267790ef52d7513879238ca9fac22c1733e0e3 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 3 Aug 2017 16:29:39 -0400
Subject: sock: add MSG_ZEROCOPY

The kernel supports zerocopy sendmsg in virtio and tap. Expand the
infrastructure to support other socket types. Introduce a completion
notification channel over the socket error queue. Notifications are
returned with ee_origin SO_EE_ORIGIN_ZEROCOPY. ee_errno is 0 to avoid
blocking the send/recv path on receiving notifications.

Add reference counting, to support the skb split, merge, resize and
clone operations possible with SOCK_STREAM and other socket types.

The patch does not yet modify any datapaths.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/errqueue.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index 07bdce1f444a..78fdf52d6b2f 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -18,10 +18,13 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_ICMP	2
 #define SO_EE_ORIGIN_ICMP6	3
 #define SO_EE_ORIGIN_TXSTATUS	4
+#define SO_EE_ORIGIN_ZEROCOPY	5
 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
+#define SO_EE_CODE_ZEROCOPY_COPIED	1
+
 /**
  *	struct scm_timestamping - timestamps exposed through cmsg
  *
-- 
cgit v1.2.3


From 76851d1212c11365362525e1e2c0a18c97478e6b Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 3 Aug 2017 16:29:40 -0400
Subject: sock: add SOCK_ZEROCOPY sockopt

The send call ignores unknown flags. Legacy applications may already
unwittingly pass MSG_ZEROCOPY. Continue to ignore this flag unless a
socket opts in to zerocopy.

Introduce socket option SO_ZEROCOPY to enable MSG_ZEROCOPY processing.
Processes can also query this socket option to detect kernel support
for the feature. Older kernels will return ENOPROTOOPT.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/asm-generic/socket.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 9861be8da65e..e47c9e436221 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -104,4 +104,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* __ASM_GENERIC_SOCKET_H */
-- 
cgit v1.2.3


From 059cf566e123ca7eb7434285c6455d7afafb4e02 Mon Sep 17 00:00:00 2001
From: Jens Wiklander <jens.wiklander@linaro.org>
Date: Thu, 16 Feb 2017 09:07:02 +0100
Subject: tee: indicate privileged dev in gen_caps

Mirrors the TEE_DESC_PRIVILEGED bit of struct tee_desc:flags into struct
tee_ioctl_version_data:gen_caps as TEE_GEN_CAP_PRIVILEGED in
tee_ioctl_version()

Reviewed-by: Jerome Forissier <jerome.forissier@linaro.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 include/uapi/linux/tee.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h
index 370d8845ab21..688782e90140 100644
--- a/include/uapi/linux/tee.h
+++ b/include/uapi/linux/tee.h
@@ -49,6 +49,7 @@
 #define TEE_MAX_ARG_SIZE	1024
 
 #define TEE_GEN_CAP_GP		(1 << 0)/* GlobalPlatform compliant TEE */
+#define TEE_GEN_CAP_PRIVILEGED	(1 << 1)/* Privileged device (for supplicant) */
 
 /*
  * TEE Implementation ID
-- 
cgit v1.2.3


From 56ce097c1caede1f9c191a7c9699b950e7c36ad9 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 4 Aug 2017 08:24:05 -0700
Subject: net: comment fixes against BPF devmap helper calls

Update BPF comments to accurately reflect XDP usage.

Fixes: 97f91a7cf04ff ("bpf: add bpf_redirect_map helper routine")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1106a8c4cd36..1d06be1569b1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -345,14 +345,20 @@ union bpf_attr {
  * int bpf_redirect(ifindex, flags)
  *     redirect to another netdev
  *     @ifindex: ifindex of the net device
- *     @flags: bit 0 - if set, redirect to ingress instead of egress
- *             other bits - reserved
- *     Return: TC_ACT_REDIRECT
- * int bpf_redirect_map(key, map, flags)
+ *     @flags:
+ *	  cls_bpf:
+ *          bit 0 - if set, redirect to ingress instead of egress
+ *          other bits - reserved
+ *	  xdp_bpf:
+ *	    all bits - reserved
+ *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
+ *	       xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
+ * int bpf_redirect_map(map, key, flags)
  *     redirect to endpoint in map
+ *     @map: pointer to dev map
  *     @key: index in map to lookup
- *     @map: fd of map to do lookup in
  *     @flags: --
+ *     Return: XDP_REDIRECT on success or XDP_ABORT on error
  *
  * u32 bpf_get_route_realm(skb)
  *     retrieve a dst's tclassid
-- 
cgit v1.2.3


From 472b46c352c9ff0b6fa57dbf85d77c51901a3368 Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sun, 6 Aug 2017 18:44:27 +0200
Subject: uapi linux/kfd_ioctl.h: only use __u32 and __u64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Include <drm/drm.h> instead of <linux/types.h> which on Linux includes
<linux/types.h> and on non-Linux platforms defines __u32 etc types.

Fixes user space compilation errors like:

linux/kfd_ioctl.h:33:2: error: unknown type name ‘uint32_t’
  uint32_t major_version; /* from KFD */
  ^~~~~~~~

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 include/uapi/linux/kfd_ioctl.h | 172 ++++++++++++++++++++---------------------
 1 file changed, 86 insertions(+), 86 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 7b4567bacfc2..26283fefdf5f 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -23,15 +23,15 @@
 #ifndef KFD_IOCTL_H_INCLUDED
 #define KFD_IOCTL_H_INCLUDED
 
-#include <linux/types.h>
+#include <drm/drm.h>
 #include <linux/ioctl.h>
 
 #define KFD_IOCTL_MAJOR_VERSION 1
 #define KFD_IOCTL_MINOR_VERSION 1
 
 struct kfd_ioctl_get_version_args {
-	uint32_t major_version;	/* from KFD */
-	uint32_t minor_version;	/* from KFD */
+	__u32 major_version;	/* from KFD */
+	__u32 minor_version;	/* from KFD */
 };
 
 /* For kfd_ioctl_create_queue_args.queue_type. */
@@ -43,36 +43,36 @@ struct kfd_ioctl_get_version_args {
 #define KFD_MAX_QUEUE_PRIORITY		15
 
 struct kfd_ioctl_create_queue_args {
-	uint64_t ring_base_address;	/* to KFD */
-	uint64_t write_pointer_address;	/* from KFD */
-	uint64_t read_pointer_address;	/* from KFD */
-	uint64_t doorbell_offset;	/* from KFD */
-
-	uint32_t ring_size;		/* to KFD */
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t queue_type;		/* to KFD */
-	uint32_t queue_percentage;	/* to KFD */
-	uint32_t queue_priority;	/* to KFD */
-	uint32_t queue_id;		/* from KFD */
-
-	uint64_t eop_buffer_address;	/* to KFD */
-	uint64_t eop_buffer_size;	/* to KFD */
-	uint64_t ctx_save_restore_address; /* to KFD */
-	uint64_t ctx_save_restore_size;	/* to KFD */
+	__u64 ring_base_address;	/* to KFD */
+	__u64 write_pointer_address;	/* from KFD */
+	__u64 read_pointer_address;	/* from KFD */
+	__u64 doorbell_offset;	/* from KFD */
+
+	__u32 ring_size;		/* to KFD */
+	__u32 gpu_id;		/* to KFD */
+	__u32 queue_type;		/* to KFD */
+	__u32 queue_percentage;	/* to KFD */
+	__u32 queue_priority;	/* to KFD */
+	__u32 queue_id;		/* from KFD */
+
+	__u64 eop_buffer_address;	/* to KFD */
+	__u64 eop_buffer_size;	/* to KFD */
+	__u64 ctx_save_restore_address; /* to KFD */
+	__u64 ctx_save_restore_size;	/* to KFD */
 };
 
 struct kfd_ioctl_destroy_queue_args {
-	uint32_t queue_id;		/* to KFD */
-	uint32_t pad;
+	__u32 queue_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_update_queue_args {
-	uint64_t ring_base_address;	/* to KFD */
+	__u64 ring_base_address;	/* to KFD */
 
-	uint32_t queue_id;		/* to KFD */
-	uint32_t ring_size;		/* to KFD */
-	uint32_t queue_percentage;	/* to KFD */
-	uint32_t queue_priority;	/* to KFD */
+	__u32 queue_id;		/* to KFD */
+	__u32 ring_size;		/* to KFD */
+	__u32 queue_percentage;	/* to KFD */
+	__u32 queue_priority;	/* to KFD */
 };
 
 /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
@@ -80,13 +80,13 @@ struct kfd_ioctl_update_queue_args {
 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
 
 struct kfd_ioctl_set_memory_policy_args {
-	uint64_t alternate_aperture_base;	/* to KFD */
-	uint64_t alternate_aperture_size;	/* to KFD */
+	__u64 alternate_aperture_base;	/* to KFD */
+	__u64 alternate_aperture_size;	/* to KFD */
 
-	uint32_t gpu_id;			/* to KFD */
-	uint32_t default_policy;		/* to KFD */
-	uint32_t alternate_policy;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;			/* to KFD */
+	__u32 default_policy;		/* to KFD */
+	__u32 alternate_policy;		/* to KFD */
+	__u32 pad;
 };
 
 /*
@@ -97,26 +97,26 @@ struct kfd_ioctl_set_memory_policy_args {
  */
 
 struct kfd_ioctl_get_clock_counters_args {
-	uint64_t gpu_clock_counter;	/* from KFD */
-	uint64_t cpu_clock_counter;	/* from KFD */
-	uint64_t system_clock_counter;	/* from KFD */
-	uint64_t system_clock_freq;	/* from KFD */
+	__u64 gpu_clock_counter;	/* from KFD */
+	__u64 cpu_clock_counter;	/* from KFD */
+	__u64 system_clock_counter;	/* from KFD */
+	__u64 system_clock_freq;	/* from KFD */
 
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 #define NUM_OF_SUPPORTED_GPUS 7
 
 struct kfd_process_device_apertures {
-	uint64_t lds_base;		/* from KFD */
-	uint64_t lds_limit;		/* from KFD */
-	uint64_t scratch_base;		/* from KFD */
-	uint64_t scratch_limit;		/* from KFD */
-	uint64_t gpuvm_base;		/* from KFD */
-	uint64_t gpuvm_limit;		/* from KFD */
-	uint32_t gpu_id;		/* from KFD */
-	uint32_t pad;
+	__u64 lds_base;		/* from KFD */
+	__u64 lds_limit;		/* from KFD */
+	__u64 scratch_base;		/* from KFD */
+	__u64 scratch_limit;		/* from KFD */
+	__u64 gpuvm_base;		/* from KFD */
+	__u64 gpuvm_limit;		/* from KFD */
+	__u32 gpu_id;		/* from KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_get_process_apertures_args {
@@ -124,8 +124,8 @@ struct kfd_ioctl_get_process_apertures_args {
 			process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
 
 	/* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */
-	uint32_t num_of_nodes;
-	uint32_t pad;
+	__u32 num_of_nodes;
+	__u32 pad;
 };
 
 #define MAX_ALLOWED_NUM_POINTS    100
@@ -133,25 +133,25 @@ struct kfd_ioctl_get_process_apertures_args {
 #define MAX_ALLOWED_WAC_BUFF_SIZE  128
 
 struct kfd_ioctl_dbg_register_args {
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_dbg_unregister_args {
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_dbg_address_watch_args {
-	uint64_t content_ptr;		/* a pointer to the actual content */
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t buf_size_in_bytes;	/*including gpu_id and buf_size */
+	__u64 content_ptr;		/* a pointer to the actual content */
+	__u32 gpu_id;		/* to KFD */
+	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
 };
 
 struct kfd_ioctl_dbg_wave_control_args {
-	uint64_t content_ptr;		/* a pointer to the actual content */
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t buf_size_in_bytes;	/*including gpu_id and buf_size */
+	__u64 content_ptr;		/* a pointer to the actual content */
+	__u32 gpu_id;		/* to KFD */
+	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
 };
 
 /* Matching HSA_EVENTTYPE */
@@ -172,44 +172,44 @@ struct kfd_ioctl_dbg_wave_control_args {
 #define KFD_SIGNAL_EVENT_LIMIT			256
 
 struct kfd_ioctl_create_event_args {
-	uint64_t event_page_offset;	/* from KFD */
-	uint32_t event_trigger_data;	/* from KFD - signal events only */
-	uint32_t event_type;		/* to KFD */
-	uint32_t auto_reset;		/* to KFD */
-	uint32_t node_id;		/* to KFD - only valid for certain
+	__u64 event_page_offset;	/* from KFD */
+	__u32 event_trigger_data;	/* from KFD - signal events only */
+	__u32 event_type;		/* to KFD */
+	__u32 auto_reset;		/* to KFD */
+	__u32 node_id;		/* to KFD - only valid for certain
 							event types */
-	uint32_t event_id;		/* from KFD */
-	uint32_t event_slot_index;	/* from KFD */
+	__u32 event_id;		/* from KFD */
+	__u32 event_slot_index;	/* from KFD */
 };
 
 struct kfd_ioctl_destroy_event_args {
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_set_event_args {
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_reset_event_args {
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_memory_exception_failure {
-	uint32_t NotPresent;	/* Page not present or supervisor privilege */
-	uint32_t ReadOnly;	/* Write access to a read-only page */
-	uint32_t NoExecute;	/* Execute access to a page marked NX */
-	uint32_t pad;
+	__u32 NotPresent;	/* Page not present or supervisor privilege */
+	__u32 ReadOnly;	/* Write access to a read-only page */
+	__u32 NoExecute;	/* Execute access to a page marked NX */
+	__u32 pad;
 };
 
 /* memory exception data*/
 struct kfd_hsa_memory_exception_data {
 	struct kfd_memory_exception_failure failure;
-	uint64_t va;
-	uint32_t gpu_id;
-	uint32_t pad;
+	__u64 va;
+	__u32 gpu_id;
+	__u32 pad;
 };
 
 /* Event data*/
@@ -217,19 +217,19 @@ struct kfd_event_data {
 	union {
 		struct kfd_hsa_memory_exception_data memory_exception_data;
 	};				/* From KFD */
-	uint64_t kfd_event_data_ext;	/* pointer to an extension structure
+	__u64 kfd_event_data_ext;	/* pointer to an extension structure
 					   for future exception types */
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_wait_events_args {
-	uint64_t events_ptr;		/* pointed to struct
+	__u64 events_ptr;		/* pointed to struct
 					   kfd_event_data array, to KFD */
-	uint32_t num_events;		/* to KFD */
-	uint32_t wait_for_all;		/* to KFD */
-	uint32_t timeout;		/* to KFD */
-	uint32_t wait_result;		/* from KFD */
+	__u32 num_events;		/* to KFD */
+	__u32 wait_for_all;		/* to KFD */
+	__u32 timeout;		/* to KFD */
+	__u32 wait_result;		/* from KFD */
 };
 
 struct kfd_ioctl_set_scratch_backing_va_args {
-- 
cgit v1.2.3


From adb8a5a5eb9f16997f11ecacf25a647134011dd7 Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sun, 6 Aug 2017 18:44:23 +0200
Subject: uapi drm/armada_drm.h: use __u32 and __u64 instead of uint32_t and
 uint64_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These are defined in linux/types.h or drm/drm.h. Fixes
user space compilation errors like:

drm/armada_drm.h:26:2: error: unknown type name ‘uint32_t’
  uint32_t handle;
  ^~~~~~~~

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
Cc: Emil Velikov <emil.l.velikov@gmail.com>
Cc: Gabriel Laskar <gabriel@lse.epita.fr>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Rob Clark <robdclark@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20170806164428.2273-33-mikko.rapeli@iki.fi
---
 include/uapi/drm/armada_drm.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/armada_drm.h b/include/uapi/drm/armada_drm.h
index 72e326f9c7de..0cb932416cfe 100644
--- a/include/uapi/drm/armada_drm.h
+++ b/include/uapi/drm/armada_drm.h
@@ -23,27 +23,27 @@ extern "C" {
 	DRM_##dir(DRM_COMMAND_BASE + DRM_ARMADA_##name, struct drm_armada_##str)
 
 struct drm_armada_gem_create {
-	uint32_t handle;
-	uint32_t size;
+	__u32 handle;
+	__u32 size;
 };
 #define DRM_IOCTL_ARMADA_GEM_CREATE \
 	ARMADA_IOCTL(IOWR, GEM_CREATE, gem_create)
 
 struct drm_armada_gem_mmap {
-	uint32_t handle;
-	uint32_t pad;
-	uint64_t offset;
-	uint64_t size;
-	uint64_t addr;
+	__u32 handle;
+	__u32 pad;
+	__u64 offset;
+	__u64 size;
+	__u64 addr;
 };
 #define DRM_IOCTL_ARMADA_GEM_MMAP \
 	ARMADA_IOCTL(IOWR, GEM_MMAP, gem_mmap)
 
 struct drm_armada_gem_pwrite {
-	uint64_t ptr;
-	uint32_t handle;
-	uint32_t offset;
-	uint32_t size;
+	__u64 ptr;
+	__u32 handle;
+	__u32 offset;
+	__u32 size;
 };
 #define DRM_IOCTL_ARMADA_GEM_PWRITE \
 	ARMADA_IOCTL(IOW, GEM_PWRITE, gem_pwrite)
-- 
cgit v1.2.3


From f02a60924c221985fb8b634734d6610706fa779a Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sun, 6 Aug 2017 18:44:07 +0200
Subject: uapi linux/dlm_netlink.h: include linux/dlmconstants.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes userspace compilation error:

error: ‘DLM_RESNAME_MAXLEN’ undeclared here (not in a function)
  char resource_name[DLM_RESNAME_MAXLEN];

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 include/uapi/linux/dlm_netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/dlm_netlink.h b/include/uapi/linux/dlm_netlink.h
index 647c8ef27227..ef1e2e08769a 100644
--- a/include/uapi/linux/dlm_netlink.h
+++ b/include/uapi/linux/dlm_netlink.h
@@ -10,6 +10,7 @@
 #define _DLM_NETLINK_H
 
 #include <linux/types.h>
+#include <linux/dlmconstants.h>
 
 enum {
 	DLM_STATUS_WAITING = 1,
-- 
cgit v1.2.3


From d1df6fd8a1d22d37cffa0075ab8ad423ce656777 Mon Sep 17 00:00:00 2001
From: David Lebrun <david.lebrun@uclouvain.be>
Date: Sat, 5 Aug 2017 12:38:26 +0200
Subject: ipv6: sr: define core operations for seg6local lightweight tunnel

This patch implements a new type of lightweight tunnel named seg6local.
A seg6local lwt is defined by a type of action and a set of parameters.
The action represents the operation to perform on the packets matching the
lwt's route, and is not necessarily an encapsulation. The set of parameters
are arguments for the processing function.

Each action is defined in a struct seg6_action_desc within
seg6_action_table[]. This structure contains the action, mandatory
attributes, the processing function, and a static headroom size required by
the action. The mandatory attributes are encoded as a bitmask field. The
static headroom is set to a non-zero value when the processing function
always add a constant number of bytes to the skb (e.g. the header size for
encapsulations).

To facilitate rtnetlink-related operations such as parsing, fill_encap,
and cmp_encap, each type of action parameter is associated to three
function pointers, in seg6_action_params[].

All actions defined in seg6_local.h are detailed in [1].

[1] https://tools.ietf.org/html/draft-filsfils-spring-srv6-network-programming-01

Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/lwtunnel.h   |  1 +
 include/uapi/linux/seg6_local.h | 68 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)
 create mode 100644 include/uapi/linux/seg6_local.h

(limited to 'include/uapi')

diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
index 92724cba1eba..7fdd19ca7511 100644
--- a/include/uapi/linux/lwtunnel.h
+++ b/include/uapi/linux/lwtunnel.h
@@ -11,6 +11,7 @@ enum lwtunnel_encap_types {
 	LWTUNNEL_ENCAP_IP6,
 	LWTUNNEL_ENCAP_SEG6,
 	LWTUNNEL_ENCAP_BPF,
+	LWTUNNEL_ENCAP_SEG6_LOCAL,
 	__LWTUNNEL_ENCAP_MAX,
 };
 
diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h
new file mode 100644
index 000000000000..ef2d8c3e76c1
--- /dev/null
+++ b/include/uapi/linux/seg6_local.h
@@ -0,0 +1,68 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SEG6_LOCAL_H
+#define _UAPI_LINUX_SEG6_LOCAL_H
+
+#include <linux/seg6.h>
+
+enum {
+	SEG6_LOCAL_UNSPEC,
+	SEG6_LOCAL_ACTION,
+	SEG6_LOCAL_SRH,
+	SEG6_LOCAL_TABLE,
+	SEG6_LOCAL_NH4,
+	SEG6_LOCAL_NH6,
+	SEG6_LOCAL_IIF,
+	SEG6_LOCAL_OIF,
+	__SEG6_LOCAL_MAX,
+};
+#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
+
+enum {
+	SEG6_LOCAL_ACTION_UNSPEC	= 0,
+	/* node segment */
+	SEG6_LOCAL_ACTION_END		= 1,
+	/* adjacency segment (IPv6 cross-connect) */
+	SEG6_LOCAL_ACTION_END_X		= 2,
+	/* lookup of next seg NH in table */
+	SEG6_LOCAL_ACTION_END_T		= 3,
+	/* decap and L2 cross-connect */
+	SEG6_LOCAL_ACTION_END_DX2	= 4,
+	/* decap and IPv6 cross-connect */
+	SEG6_LOCAL_ACTION_END_DX6	= 5,
+	/* decap and IPv4 cross-connect */
+	SEG6_LOCAL_ACTION_END_DX4	= 6,
+	/* decap and lookup of DA in v6 table */
+	SEG6_LOCAL_ACTION_END_DT6	= 7,
+	/* decap and lookup of DA in v4 table */
+	SEG6_LOCAL_ACTION_END_DT4	= 8,
+	/* binding segment with insertion */
+	SEG6_LOCAL_ACTION_END_B6	= 9,
+	/* binding segment with encapsulation */
+	SEG6_LOCAL_ACTION_END_B6_ENCAP	= 10,
+	/* binding segment with MPLS encap */
+	SEG6_LOCAL_ACTION_END_BM	= 11,
+	/* lookup last seg in table */
+	SEG6_LOCAL_ACTION_END_S		= 12,
+	/* forward to SR-unaware VNF with static proxy */
+	SEG6_LOCAL_ACTION_END_AS	= 13,
+	/* forward to SR-unaware VNF with masquerading */
+	SEG6_LOCAL_ACTION_END_AM	= 14,
+
+	__SEG6_LOCAL_ACTION_MAX,
+};
+
+#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1)
+
+#endif
-- 
cgit v1.2.3


From 3be8eddd9d58a925b461b582fa5aa422a9c145ee Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 25 Jul 2017 09:27:33 -0700
Subject: drm/vc4: Add exec flags to allow forcing a specific X/Y tile walk
 order.

This is useful to allow GL to provide defined results for overlapping
glBlitFramebuffer, which X11 in turn uses to accelerate uncomposited
window movement without first blitting to a temporary.  x11perf
-copywinwin100 goes from 1850/sec to 4850/sec.

v2: Default to the same behavior as before when the flags aren't
    passed. (suggested by Boris)

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20170725162733.28007-2-eric@anholt.net
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 include/uapi/drm/vc4_drm.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index 551628e571f9..afae87004963 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -155,6 +155,16 @@ struct drm_vc4_submit_cl {
 	__u32 pad:24;
 
 #define VC4_SUBMIT_CL_USE_CLEAR_COLOR			(1 << 0)
+/* By default, the kernel gets to choose the order that the tiles are
+ * rendered in.  If this is set, then the tiles will be rendered in a
+ * raster order, with the right-to-left vs left-to-right and
+ * top-to-bottom vs bottom-to-top dictated by
+ * VC4_SUBMIT_CL_RCL_ORDER_INCREASING_*.  This allows overlapping
+ * blits to be implemented using the 3D engine.
+ */
+#define VC4_SUBMIT_CL_FIXED_RCL_ORDER			(1 << 1)
+#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X		(1 << 2)
+#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y		(1 << 3)
 	__u32 flags;
 
 	/* Returned value of the seqno of this render job (for the
@@ -294,6 +304,7 @@ struct drm_vc4_get_hang_state {
 #define DRM_VC4_PARAM_SUPPORTS_BRANCHES		3
 #define DRM_VC4_PARAM_SUPPORTS_ETC1		4
 #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5
+#define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER	6
 
 struct drm_vc4_get_param {
 	__u32 param;
-- 
cgit v1.2.3


From 92b31a9af73b3a3fc801899335d6c47966351830 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 10 Aug 2017 01:39:55 +0200
Subject: bpf: add BPF_J{LT,LE,SLT,SLE} instructions

Currently, eBPF only understands BPF_JGT (>), BPF_JGE (>=),
BPF_JSGT (s>), BPF_JSGE (s>=) instructions, this means that
particularly *JLT/*JLE counterparts involving immediates need
to be rewritten from e.g. X < [IMM] by swapping arguments into
[IMM] > X, meaning the immediate first is required to be loaded
into a register Y := [IMM], such that then we can compare with
Y > X. Note that the destination operand is always required to
be a register.

This has the downside of having unnecessarily increased register
pressure, meaning complex program would need to spill other
registers temporarily to stack in order to obtain an unused
register for the [IMM]. Loading to registers will thus also
affect state pruning since we need to account for that register
use and potentially those registers that had to be spilled/filled
again. As a consequence slightly more stack space might have
been used due to spilling, and BPF programs are a bit longer
due to extra code involving the register load and potentially
required spill/fills.

Thus, add BPF_JLT (<), BPF_JLE (<=), BPF_JSLT (s<), BPF_JSLE (s<=)
counterparts to the eBPF instruction set. Modifying LLVM to
remove the NegateCC() workaround in a PoC patch at [1] and
allowing it to also emit the new instructions resulted in
cilium's BPF programs that are injected into the fast-path to
have a reduced program length in the range of 2-3% (e.g.
accumulated main and tail call sections from one of the object
file reduced from 4864 to 4729 insns), reduced complexity in
the range of 10-30% (e.g. accumulated sections reduced in one
of the cases from 116432 to 88428 insns), and reduced stack
usage in the range of 1-5% (e.g. accumulated sections from one
of the object files reduced from 824 to 784b).

The modification for LLVM will be incorporated in a backwards
compatible way. Plan is for LLVM to have i) a target specific
option to offer a possibility to explicitly enable the extension
by the user (as we have with -m target specific extensions today
for various CPU insns), and ii) have the kernel checked for
presence of the extensions and enable them transparently when
the user is selecting more aggressive options such as -march=native
in a bpf target context. (Other frontends generating BPF byte
code, e.g. ply can probe the kernel directly for its code
generation.)

  [1] https://github.com/borkmann/llvm/tree/bpf-insns

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1d06be1569b1..91da8371a2d0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -30,9 +30,14 @@
 #define BPF_FROM_LE	BPF_TO_LE
 #define BPF_FROM_BE	BPF_TO_BE
 
+/* jmp encodings */
 #define BPF_JNE		0x50	/* jump != */
+#define BPF_JLT		0xa0	/* LT is unsigned, '<' */
+#define BPF_JLE		0xb0	/* LE is unsigned, '<=' */
 #define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
 #define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
+#define BPF_JSLT	0xc0	/* SLT is signed, '<' */
+#define BPF_JSLE	0xd0	/* SLE is signed, '<=' */
 #define BPF_CALL	0x80	/* function call */
 #define BPF_EXIT	0x90	/* function return */
 
-- 
cgit v1.2.3


From 1a6e7c31d71db34d1b9bc3acc87eaea6c2ecc997 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 20 Jun 2017 07:55:53 +0300
Subject: RDMA/netlink: Add netlink device definitions to UAPI

Introduce new defines to rdma_netlink.h, so the RDMA configuration tool
will be able to communicate with RDMA subsystem by using the shared defines.

The addition of new client (NLDEV) revealed the fact that we exposed by
mistake the RDMA_NL_I40IW define which is not backed by any RDMA netlink
by now and it won't be exposed in the future too. So this patch reuses
the value and deletes the old defines.

The NLDEV operates with objects. The struct ib_device has two straightforward
objects: device itself and ports of that device.

This brings us to propose the following commands to work on those objects:
 * RDMA_NLDEV_CMD_{GET,SET,NEW,DEL} - works on ib_device itself
 * RDMA_NLDEV_CMD_PORT_{GET,SET,NEW,DEL} - works on ports of specific ib_device

Those commands receive/return the device index (RDMA_NLDEV_ATTR_DEV_INDEX)
and port index (RDMA_NLDEV_ATTR_PORT_INDEX). For device object accesses,
the RDMA_NLDEV_ATTR_PORT_INDEX will return the maximum number of ports
for specific ib_device and for port access the actual port index.

The port index starts from 1 to follow RDMA/core internal semantics and
the sysfs exposed knobs.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
---
 include/uapi/rdma/rdma_netlink.h | 39 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 02fe8390c18f..a44229fa5eca 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -8,7 +8,7 @@ enum {
 	RDMA_NL_IWCM,
 	RDMA_NL_RSVD,
 	RDMA_NL_LS,	/* RDMA Local Services */
-	RDMA_NL_I40IW,
+	RDMA_NL_NLDEV,	/* RDMA device interface */
 	RDMA_NL_NUM_CLIENTS
 };
 
@@ -222,4 +222,41 @@ struct rdma_nla_ls_gid {
 	__u8		gid[16];
 };
 
+enum rdma_nldev_command {
+	RDMA_NLDEV_CMD_UNSPEC,
+
+	RDMA_NLDEV_CMD_GET, /* can dump */
+	RDMA_NLDEV_CMD_SET,
+	RDMA_NLDEV_CMD_NEW,
+	RDMA_NLDEV_CMD_DEL,
+
+	RDMA_NLDEV_CMD_PORT_GET, /* can dump */
+	RDMA_NLDEV_CMD_PORT_SET,
+	RDMA_NLDEV_CMD_PORT_NEW,
+	RDMA_NLDEV_CMD_PORT_DEL,
+
+	RDMA_NLDEV_NUM_OPS
+};
+
+enum rdma_nldev_attr {
+	/* don't change the order or add anything between, this is ABI! */
+	RDMA_NLDEV_ATTR_UNSPEC,
+
+	/* Identifier for ib_device */
+	RDMA_NLDEV_ATTR_DEV_INDEX,		/* u32 */
+
+	RDMA_NLDEV_ATTR_DEV_NAME,		/* string */
+	/*
+	 * Device index together with port index are identifiers
+	 * for port/link properties.
+	 *
+	 * For RDMA_NLDEV_CMD_GET commamnd, port index will return number
+	 * of available ports in ib_device, while for port specific operations,
+	 * it will be real port index as it appears in sysfs. Port index follows
+	 * sysfs notation and starts from 1 for the first port.
+	 */
+	RDMA_NLDEV_ATTR_PORT_INDEX,		/* u32 */
+
+	RDMA_NLDEV_ATTR_MAX
+};
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
cgit v1.2.3


From ac50525374315b9b609747f83b07f8dccb06b722 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 20 Jun 2017 14:47:08 +0300
Subject: RDMA/netlink: Expose device and port capability masks

The port capability mask is exposed to user space via sysfs interface,
while device capabilities are available for verbs only.

This patch provides those capabilities through netlink interface.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
---
 include/uapi/rdma/rdma_netlink.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index a44229fa5eca..90de11db6580 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -257,6 +257,11 @@ enum rdma_nldev_attr {
 	 */
 	RDMA_NLDEV_ATTR_PORT_INDEX,		/* u32 */
 
+	/*
+	 * Device and port capabilities
+	 */
+	RDMA_NLDEV_ATTR_CAP_FLAGS,		/* u64 */
+
 	RDMA_NLDEV_ATTR_MAX
 };
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
cgit v1.2.3


From 8621a7e3c1c22e18385c9ced1647363884ea2aa1 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 27 Jun 2017 16:58:59 +0300
Subject: RDMA/netlink: Export FW version

Add FW version to the device properties exported
by RDMA netlink, to be used by RDMAtool.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/uapi/rdma/rdma_netlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 90de11db6580..5159858730b0 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -262,6 +262,10 @@ enum rdma_nldev_attr {
 	 */
 	RDMA_NLDEV_ATTR_CAP_FLAGS,		/* u64 */
 
+	/*
+	 * FW version
+	 */
+	RDMA_NLDEV_ATTR_FW_VERSION,		/* string */
 	RDMA_NLDEV_ATTR_MAX
 };
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
cgit v1.2.3


From 1aaff896ca6b968a639e3e1e72ba6146ba332501 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Wed, 28 Jun 2017 14:01:37 +0300
Subject: RDMA/netlink: Export node_guid and sys_image_guid

Add Node GUID and system image GUID to the device properties
exported by RDMA netlink, to be used by RDMAtool.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/uapi/rdma/rdma_netlink.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 5159858730b0..fe3a7429e7a1 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -266,6 +266,19 @@ enum rdma_nldev_attr {
 	 * FW version
 	 */
 	RDMA_NLDEV_ATTR_FW_VERSION,		/* string */
+
+	/*
+	 * Node GUID (in host byte order) associated with the RDMA device.
+	 */
+	RDMA_NLDEV_ATTR_NODE_GUID,			/* u64 */
+
+	/*
+	 * System image GUID (in host byte order) associated with
+	 * this RDMA device and other devices which are part of a
+	 * single system.
+	 */
+	RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,		/* u64 */
+
 	RDMA_NLDEV_ATTR_MAX
 };
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
cgit v1.2.3


From 12026fbba6af2fc53c3c6cf88bdfc6561986ba82 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Wed, 28 Jun 2017 15:05:14 +0300
Subject: RDMA/netlink: Advertise IB subnet prefix

Add IB subnet prefix to the port properties exported
by RDMA netlink.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/uapi/rdma/rdma_netlink.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index fe3a7429e7a1..481003182a35 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -279,6 +279,11 @@ enum rdma_nldev_attr {
 	 */
 	RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,		/* u64 */
 
+	/*
+	 * Subnet prefix (in host byte order)
+	 */
+	RDMA_NLDEV_ATTR_SUBNET_PREFIX,		/* u64 */
+
 	RDMA_NLDEV_ATTR_MAX
 };
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
cgit v1.2.3


From 80a06dd36f79de7007f21f5cbe42181a4e5c7d6d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Wed, 28 Jun 2017 15:38:36 +0300
Subject: RDMA/netink: Export lids and sm_lids

According to the IB specification, the LID and SM_LID
are 16-bit wide, but to support OmniPath users, export
it as 32-bit value from the beginning.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/uapi/rdma/rdma_netlink.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 481003182a35..7d5caaf54126 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -284,6 +284,14 @@ enum rdma_nldev_attr {
 	 */
 	RDMA_NLDEV_ATTR_SUBNET_PREFIX,		/* u64 */
 
+	/*
+	 * Local Identifier (LID),
+	 * According to IB specification, It is 16-bit address assigned
+	 * by the Subnet Manager. Extended to be 32-bit for OmniPath users.
+	 */
+	RDMA_NLDEV_ATTR_LID,			/* u32 */
+	RDMA_NLDEV_ATTR_SM_LID,			/* u32 */
+
 	RDMA_NLDEV_ATTR_MAX
 };
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
cgit v1.2.3


From 34840fea112d36507c19dc6052b8c6d88bdd9c16 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Wed, 28 Jun 2017 15:49:30 +0300
Subject: RDMA/netlink: Export LID mask control (LMC)

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/uapi/rdma/rdma_netlink.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 7d5caaf54126..035706e6b016 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -292,6 +292,11 @@ enum rdma_nldev_attr {
 	RDMA_NLDEV_ATTR_LID,			/* u32 */
 	RDMA_NLDEV_ATTR_SM_LID,			/* u32 */
 
+	/*
+	 * LID mask control (LMC)
+	 */
+	RDMA_NLDEV_ATTR_LMC,			/* u8 */
+
 	RDMA_NLDEV_ATTR_MAX
 };
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
cgit v1.2.3


From 5654e49db0b2d87c12b6e120b6a830abe3d3921b Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 29 Jun 2017 13:12:45 +0300
Subject: RDMA/netlink: Provide port state and physical link state

Add port state and physical link state to the users of RDMA netlink.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/uapi/rdma/rdma_netlink.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 035706e6b016..c488c3cf361b 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -297,6 +297,9 @@ enum rdma_nldev_attr {
 	 */
 	RDMA_NLDEV_ATTR_LMC,			/* u8 */
 
+	RDMA_NLDEV_ATTR_PORT_STATE,		/* u8 */
+	RDMA_NLDEV_ATTR_PORT_PHYS_STATE,	/* u8 */
+
 	RDMA_NLDEV_ATTR_MAX
 };
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
cgit v1.2.3


From 1bb77b8c1d57149ed0aa6825255ead80ae584034 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 29 Jun 2017 16:01:29 +0300
Subject: RDMA/netlink: Export node_type

Add ability to get node_type for RDAM netlink users.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/uapi/rdma/rdma_netlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index c488c3cf361b..861440a87e7c 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -300,6 +300,8 @@ enum rdma_nldev_attr {
 	RDMA_NLDEV_ATTR_PORT_STATE,		/* u8 */
 	RDMA_NLDEV_ATTR_PORT_PHYS_STATE,	/* u8 */
 
+	RDMA_NLDEV_ATTR_DEV_NODE_TYPE,		/* u8 */
+
 	RDMA_NLDEV_ATTR_MAX
 };
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
cgit v1.2.3


From bbfb6ce86c9889a5d434e2e603d41e0ce5b552e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 1 Aug 2017 09:58:12 -0700
Subject: drm/i915: Implement .get_format_info() hook for CCS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SKL+ display engine can scan out certain kinds of compressed surfaces
produced by the render engine. This involved telling the display engine
the location of the color control surfae (CCS) which describes which
parts of the main surface are compressed and which are not. The location
of CCS is provided by userspace as just another plane with its own offset.

By providing our own format information for the CCS formats, we should
be able to make framebuffer_check() do the right thing for the CCS
surface as well.

Note that we'll return the same format info for both Y and Yf tiled
format as that's what happens with the non-CCS Y vs. Yf as well. If
desired, we could potentially return a unique pointer for each
pixel_format+tiling+ccs combination, in which case we immediately be
able to tell if any of that stuff changed by just comparing the
pointers. But that does sound a bit wasteful space wise.

v2: Drop the 'dev' argument from the hook
v3: Include the description of the CCS surface layout
v4: Pretend CCS tiles are regular 128 byte wide Y tiles (Jason)
v5: Re-drop 'dev', fix commit message, add missing drm_fourcc.h
    description of CCS layout. (daniels)

Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net> (v3)
Reviewed-by: Daniel Stone <daniels@collabora.com>
Signed-off-by: Ville Syrjä <ville.syrjala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Stone <daniels@collabora.com>
---
 include/uapi/drm/drm_fourcc.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 76c9101a7fc6..3ad838d3f93f 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -263,6 +263,26 @@ extern "C" {
  */
 #define I915_FORMAT_MOD_Yf_TILED fourcc_mod_code(INTEL, 3)
 
+/*
+ * Intel color control surface (CCS) for render compression
+ *
+ * The framebuffer format must be one of the 8:8:8:8 RGB formats.
+ * The main surface will be plane index 0 and must be Y/Yf-tiled,
+ * the CCS will be plane index 1.
+ *
+ * Each CCS tile matches a 1024x512 pixel area of the main surface.
+ * To match certain aspects of the 3D hardware the CCS is
+ * considered to be made up of normal 128Bx32 Y tiles, Thus
+ * the CCS pitch must be specified in multiples of 128 bytes.
+ *
+ * In reality the CCS tile appears to be a 64Bx64 Y tile, composed
+ * of QWORD (8 bytes) chunks instead of OWORD (16 bytes) chunks.
+ * But that fact is not relevant unless the memory is accessed
+ * directly.
+ */
+#define I915_FORMAT_MOD_Y_TILED_CCS	fourcc_mod_code(INTEL, 4)
+#define I915_FORMAT_MOD_Yf_TILED_CCS	fourcc_mod_code(INTEL, 5)
+
 /*
  * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks
  *
-- 
cgit v1.2.3


From 077fbac405bfc6d41419ad6c1725804ad4e9887c Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Fri, 11 Aug 2017 02:11:33 +0900
Subject: net: xfrm: support setting an output mark.

On systems that use mark-based routing it may be necessary for
routing lookups to use marks in order for packets to be routed
correctly. An example of such a system is Android, which uses
socket marks to route packets via different networks.

Currently, routing lookups in tunnel mode always use a mark of
zero, making routing incorrect on such systems.

This patch adds a new output_mark element to the xfrm state and
a corresponding XFRMA_OUTPUT_MARK netlink attribute. The output
mark differs from the existing xfrm mark in two ways:

1. The xfrm mark is used to match xfrm policies and states, while
   the xfrm output mark is used to set the mark (and influence
   the routing) of the packets emitted by those states.
2. The existing mark is constrained to be a subset of the bits of
   the originating socket or transformed packet, but the output
   mark is arbitrary and depends only on the state.

The use of a separate mark provides additional flexibility. For
example:

- A packet subject to two transforms (e.g., transport mode inside
  tunnel mode) can have two different output marks applied to it,
  one for the transport mode SA and one for the tunnel mode SA.
- On a system where socket marks determine routing, the packets
  emitted by an IPsec tunnel can be routed based on a mark that
  is determined by the tunnel, not by the marks of the
  unencrypted packets.
- Support for setting the output marks can be introduced without
  breaking any existing setups that employ both mark-based
  routing and xfrm tunnel mode. Simply changing the code to use
  the xfrm mark for routing output packets could xfrm mark could
  change behaviour in a way that breaks these setups.

If the output mark is unspecified or set to zero, the mark is not
set or changed.

Tested: make allyesconfig; make -j64
Tested: https://android-review.googlesource.com/452776
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/uapi/linux/xfrm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 2b384ff09fa0..5fe7370a2bef 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -304,6 +304,7 @@ enum xfrm_attr_type_t {
 	XFRMA_ADDRESS_FILTER,	/* struct xfrm_address_filter */
 	XFRMA_PAD,
 	XFRMA_OFFLOAD_DEV,	/* struct xfrm_state_offload */
+	XFRMA_OUTPUT_MARK,	/* __u32 */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
-- 
cgit v1.2.3


From 34fc75bfc616f1c1fbab56508c3f48f4b97c97ea Mon Sep 17 00:00:00 2001
From: Florian Weimer <fweimer@redhat.com>
Date: Fri, 11 Aug 2017 16:24:15 +0200
Subject: uapi/linux/quota.h: Do not include linux/errno.h

linux/errno.h is very sensitive to coordination with libc headers.
Nothing in linux/quota.h needs it, so this change allows using
this header in more contexts.

Signed-off-by: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/uapi/linux/quota.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/quota.h b/include/uapi/linux/quota.h
index 4d2489ef6f10..f17c9636a859 100644
--- a/include/uapi/linux/quota.h
+++ b/include/uapi/linux/quota.h
@@ -33,7 +33,6 @@
 #ifndef _UAPI_LINUX_QUOTA_
 #define _UAPI_LINUX_QUOTA_
 
-#include <linux/errno.h>
 #include <linux/types.h>
 
 #define __DQUOT_VERSION__	"dquot_6.6.0"
-- 
cgit v1.2.3


From d612b1fd8010d0d67b5287fe146b8b55bcbb8655 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:53 +0000
Subject: seccomp: Operation for checking if an action is available

Userspace code that needs to check if the kernel supports a given action
may not be able to use the /proc/sys/kernel/seccomp/actions_avail
sysctl. The process may be running in a sandbox and, therefore,
sufficient filesystem access may not be available. This patch adds an
operation to the seccomp(2) syscall that allows userspace code to ask
the kernel if a given action is available.

If the action is supported by the kernel, 0 is returned. If the action
is not supported by the kernel, -1 is returned with errno set to
-EOPNOTSUPP. If this check is attempted on a kernel that doesn't support
this new operation, -1 is returned with errno set to -EINVAL meaning
that userspace code will have the ability to differentiate between the
two error cases.

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/seccomp.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 0f238a43ff1e..aaad61cc46bc 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -11,8 +11,9 @@
 #define SECCOMP_MODE_FILTER	2 /* uses user-supplied filter. */
 
 /* Valid operations for seccomp syscall. */
-#define SECCOMP_SET_MODE_STRICT	0
-#define SECCOMP_SET_MODE_FILTER	1
+#define SECCOMP_SET_MODE_STRICT		0
+#define SECCOMP_SET_MODE_FILTER		1
+#define SECCOMP_GET_ACTION_AVAIL	2
 
 /* Valid flags for SECCOMP_SET_MODE_FILTER */
 #define SECCOMP_FILTER_FLAG_TSYNC	1
-- 
cgit v1.2.3


From e66a39977985b1e69e17c4042cb290768eca9b02 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:56 +0000
Subject: seccomp: Filter flag to log all actions except SECCOMP_RET_ALLOW

Add a new filter flag, SECCOMP_FILTER_FLAG_LOG, that enables logging for
all actions except for SECCOMP_RET_ALLOW for the given filter.

SECCOMP_RET_KILL actions are always logged, when "kill" is in the
actions_logged sysctl, and SECCOMP_RET_ALLOW actions are never logged,
regardless of this flag.

This flag can be used to create noisy filters that result in all
non-allowed actions to be logged. A process may have one noisy filter,
which is loaded with this flag, as well as a quiet filter that's not
loaded with this flag. This allows for the actions in a set of filters
to be selectively conveyed to the admin.

Since a system could have a large number of allocated seccomp_filter
structs, struct packing was taken in consideration. On 64 bit x86, the
new log member takes up one byte of an existing four byte hole in the
struct. On 32 bit x86, the new log member creates a new four byte hole
(unavoidable) and consumes one of those bytes.

Unfortunately, the tests added for SECCOMP_FILTER_FLAG_LOG are not
capable of inspecting the audit log to verify that the actions taken in
the filter were logged.

With this patch, the logic for deciding if an action will be logged is:

if action == RET_ALLOW:
  do not log
else if action == RET_KILL && RET_KILL in actions_logged:
  log
else if filter-requests-logging && action in actions_logged:
  log
else if audit_enabled && process-is-being-audited:
  log
else:
  do not log

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/seccomp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index aaad61cc46bc..19a611d0712e 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -17,6 +17,7 @@
 
 /* Valid flags for SECCOMP_SET_MODE_FILTER */
 #define SECCOMP_FILTER_FLAG_TSYNC	1
+#define SECCOMP_FILTER_FLAG_LOG		2
 
 /*
  * All BPF programs must return a 32-bit value.
-- 
cgit v1.2.3


From 59f5cf44a38284eb9e76270c786fb6cc62ef8ac4 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:57 +0000
Subject: seccomp: Action to log before allowing

Add a new action, SECCOMP_RET_LOG, that logs a syscall before allowing
the syscall. At the implementation level, this action is identical to
the existing SECCOMP_RET_ALLOW action. However, it can be very useful when
initially developing a seccomp filter for an application. The developer
can set the default action to be SECCOMP_RET_LOG, maybe mark any
obviously needed syscalls with SECCOMP_RET_ALLOW, and then put the
application through its paces. A list of syscalls that triggered the
default action (SECCOMP_RET_LOG) can be easily gleaned from the logs and
that list can be used to build the syscall whitelist. Finally, the
developer can change the default action to the desired value.

This provides a more friendly experience than seeing the application get
killed, then updating the filter and rebuilding the app, seeing the
application get killed due to a different syscall, then updating the
filter and rebuilding the app, etc.

The functionality is similar to what's supported by the various LSMs.
SELinux has permissive mode, AppArmor has complain mode, SMACK has
bring-up mode, etc.

SECCOMP_RET_LOG is given a lower value than SECCOMP_RET_ALLOW as allow
while logging is slightly more restrictive than quietly allowing.

Unfortunately, the tests added for SECCOMP_RET_LOG are not capable of
inspecting the audit log to verify that the syscall was logged.

With this patch, the logic for deciding if an action will be logged is:

if action == RET_ALLOW:
  do not log
else if action == RET_KILL && RET_KILL in actions_logged:
  log
else if action == RET_LOG && RET_LOG in actions_logged:
  log
else if filter-requests-logging && action in actions_logged:
  log
else if audit_enabled && process-is-being-audited:
  log
else:
  do not log

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/seccomp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 19a611d0712e..f94433263e4b 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -31,6 +31,7 @@
 #define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
 #define SECCOMP_RET_ERRNO	0x00050000U /* returns an errno */
 #define SECCOMP_RET_TRACE	0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_LOG		0x7ffc0000U /* allow after logging */
 #define SECCOMP_RET_ALLOW	0x7fff0000U /* allow */
 
 /* Masks for the return value sections. */
-- 
cgit v1.2.3


From fd76875ca289a3d4722f266fd2d5532a27083903 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Aug 2017 12:53:18 -0700
Subject: seccomp: Rename SECCOMP_RET_KILL to SECCOMP_RET_KILL_THREAD

In preparation for adding SECCOMP_RET_KILL_PROCESS, rename SECCOMP_RET_KILL
to the more accurate SECCOMP_RET_KILL_THREAD.

The existing selftest values are intentionally left as SECCOMP_RET_KILL
just to be sure we're exercising the alias.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/seccomp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index f94433263e4b..5a03f699eb17 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -27,7 +27,8 @@
  * The ordering ensures that a min_t() over composed return values always
  * selects the least permissive choice.
  */
-#define SECCOMP_RET_KILL	0x00000000U /* kill the task immediately */
+#define SECCOMP_RET_KILL_THREAD	0x00000000U /* kill the thread */
+#define SECCOMP_RET_KILL	SECCOMP_RET_KILL_THREAD
 #define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
 #define SECCOMP_RET_ERRNO	0x00050000U /* returns an errno */
 #define SECCOMP_RET_TRACE	0x7ff00000U /* pass to a tracer or disallow */
-- 
cgit v1.2.3


From 4d3b0b05aae9ee9ce0970dc4cc0fb3fad5e85945 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Aug 2017 13:01:39 -0700
Subject: seccomp: Introduce SECCOMP_RET_KILL_PROCESS

This introduces the BPF return value for SECCOMP_RET_KILL_PROCESS to kill
an entire process. This cannot yet be reached by seccomp, but it changes
the default-kill behavior (for unknown return values) from kill-thread to
kill-process.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/seccomp.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 5a03f699eb17..7e77c92df78a 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -22,18 +22,20 @@
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
- * The upper 16-bits are ordered from least permissive values to most.
+ * The upper 16-bits are ordered from least permissive values to most,
+ * as a signed value (so 0x8000000 is negative).
  *
  * The ordering ensures that a min_t() over composed return values always
  * selects the least permissive choice.
  */
-#define SECCOMP_RET_KILL_THREAD	0x00000000U /* kill the thread */
-#define SECCOMP_RET_KILL	SECCOMP_RET_KILL_THREAD
-#define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
-#define SECCOMP_RET_ERRNO	0x00050000U /* returns an errno */
-#define SECCOMP_RET_TRACE	0x7ff00000U /* pass to a tracer or disallow */
-#define SECCOMP_RET_LOG		0x7ffc0000U /* allow after logging */
-#define SECCOMP_RET_ALLOW	0x7fff0000U /* allow */
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
+#define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
+#define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
+#define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
 
 /* Masks for the return value sections. */
 #define SECCOMP_RET_ACTION	0x7fff0000U
-- 
cgit v1.2.3


From 0466bdb99e8744bc9befa8d62a317f0fd7fd7421 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Aug 2017 13:12:11 -0700
Subject: seccomp: Implement SECCOMP_RET_KILL_PROCESS action
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Right now, SECCOMP_RET_KILL_THREAD (neé SECCOMP_RET_KILL) kills the
current thread. There have been a few requests for this to kill the entire
process (the thread group). This cannot be just changed (discovered when
adding coredump support since coredumping kills the entire process)
because there are userspace programs depending on the thread-kill
behavior.

Instead, implement SECCOMP_RET_KILL_PROCESS, which is 0x80000000, and can
be processed as "-1" by the kernel, below the existing RET_KILL that is
ABI-set to "0". For userspace, SECCOMP_RET_ACTION_FULL is added to expand
the mask to the signed bit. Old userspace using the SECCOMP_RET_ACTION
mask will see SECCOMP_RET_KILL_PROCESS as 0 still, but this would only
be visible when examining the siginfo in a core dump from a RET_KILL_*,
where it will think it was thread-killed instead of process-killed.

Attempts to introduce this behavior via other ways (filter flags,
seccomp struct flags, masked RET_DATA bits) all come with weird
side-effects and baggage. This change preserves the central behavioral
expectations of the seccomp filter engine without putting too great
a burden on changes needed in userspace to use the new action.

The new action is discoverable by userspace through either the new
actions_avail sysctl or through the SECCOMP_GET_ACTION_AVAIL seccomp
operation. If used without checking for availability, old kernels
will treat RET_KILL_PROCESS as RET_KILL_THREAD (since the old mask
will produce RET_KILL_THREAD).

Cc: Paul Moore <paul@paul-moore.com>
Cc: Fabricio Voznika <fvoznika@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/seccomp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 7e77c92df78a..f6bc1dea3247 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -38,6 +38,7 @@
 #define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
 
 /* Masks for the return value sections. */
+#define SECCOMP_RET_ACTION_FULL	0xffff0000U
 #define SECCOMP_RET_ACTION	0x7fff0000U
 #define SECCOMP_RET_DATA	0x0000ffffU
 
-- 
cgit v1.2.3


From 44dd8a989c787e9077745417140aa132bfe45bf5 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Tue, 15 Aug 2017 19:07:53 +0800
Subject: include: uapi: usb: Introduce USB charger type and state definition

Introducing USB charger type and state definition can help
to support USB charging which will be added in USB phy core.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 include/uapi/linux/usb/charger.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 include/uapi/linux/usb/charger.h

(limited to 'include/uapi')

diff --git a/include/uapi/linux/usb/charger.h b/include/uapi/linux/usb/charger.h
new file mode 100644
index 000000000000..5f72af35b3ed
--- /dev/null
+++ b/include/uapi/linux/usb/charger.h
@@ -0,0 +1,31 @@
+/*
+ * This file defines the USB charger type and state that are needed for
+ * USB device APIs.
+ */
+
+#ifndef _UAPI__LINUX_USB_CHARGER_H
+#define _UAPI__LINUX_USB_CHARGER_H
+
+/*
+ * USB charger type:
+ * SDP (Standard Downstream Port)
+ * DCP (Dedicated Charging Port)
+ * CDP (Charging Downstream Port)
+ * ACA (Accessory Charger Adapters)
+ */
+enum usb_charger_type {
+	UNKNOWN_TYPE,
+	SDP_TYPE,
+	DCP_TYPE,
+	CDP_TYPE,
+	ACA_TYPE,
+};
+
+/* USB charger state */
+enum usb_charger_state {
+	USB_CHARGER_DEFAULT,
+	USB_CHARGER_PRESENT,
+	USB_CHARGER_ABSENT,
+};
+
+#endif /* _UAPI__LINUX_USB_CHARGER_H */
-- 
cgit v1.2.3


From cf6e7bac6357f0ccca51fcb5eb325e724f6b4c95 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Tue, 15 Aug 2017 15:57:33 +0100
Subject: drm/i915: Add support for drm syncobjs

This commit adds support for waiting on or signaling DRM syncobjs as
part of execbuf.  It does so by hijacking the currently unused cliprects
pointer to instead point to an array of i915_gem_exec_fence structs
which containe a DRM syncobj and a flags parameter which specifies
whether to wait on it or to signal it.  This implementation
theoretically allows for both flags to be set in which case it waits on
the dma_fence that was in the syncobj and then immediately replaces it
with the dma_fence from the current execbuf.

v2:
 - Rebase on new syncobj API
v3:
 - Pull everything out into helpers
 - Do all allocation in gem_execbuffer2
 - Pack the flags in the bottom 2 bits of the drm_syncobj*
v4:
 - Prevent a potential race on syncobj->fence

Testcase: igt/gem_exec_fence/syncobj*
Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Link: https://patchwork.freedesktop.org/patch/msgid/1499289202-25441-1-git-send-email-jason.ekstrand@intel.com
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20170815145733.4562-1-chris@chris-wilson.co.uk
---
 include/uapi/drm/i915_drm.h | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ce3833fa1e06..6598fb76d2c2 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -435,6 +435,11 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_BATCH_FIRST	 48
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * drm_i915_gem_exec_fence structures.  See I915_EXEC_FENCE_ARRAY.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -816,6 +821,17 @@ struct drm_i915_gem_exec_object2 {
 	__u64 rsvd2;
 };
 
+struct drm_i915_gem_exec_fence {
+	/**
+	 * User's handle for a drm_syncobj to wait on or signal.
+	 */
+	__u32 handle;
+
+#define I915_EXEC_FENCE_WAIT            (1<<0)
+#define I915_EXEC_FENCE_SIGNAL          (1<<1)
+	__u32 flags;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
@@ -830,7 +846,11 @@ struct drm_i915_gem_execbuffer2 {
 	__u32 DR1;
 	__u32 DR4;
 	__u32 num_cliprects;
-	/** This is a struct drm_clip_rect *cliprects */
+	/**
+	 * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
+	 * is not set.  If I915_EXEC_FENCE_ARRAY is set, then this is a
+	 * struct drm_i915_gem_exec_fence *fences.
+	 */
 	__u64 cliprects_ptr;
 #define I915_EXEC_RING_MASK              (7<<0)
 #define I915_EXEC_DEFAULT                (0<<0)
@@ -931,7 +951,14 @@ struct drm_i915_gem_execbuffer2 {
  * element).
  */
 #define I915_EXEC_BATCH_FIRST		(1<<18)
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1))
+
+/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr
+ * define an array of i915_gem_exec_fence structures which specify a set of
+ * dma fences to wait upon or signal.
+ */
+#define I915_EXEC_FENCE_ARRAY   (1<<19)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
cgit v1.2.3


From 5c1aab1dd5445ed8bdcdbb575abc1b0d7ee5b2e7 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Wed, 9 Aug 2017 19:39:02 -0700
Subject: btrfs: Add zstd support

Add zstd compression and decompression support to BtrFS. zstd at its
fastest level compresses almost as well as zlib, while offering much
faster compression and decompression, approaching lzo speeds.

I benchmarked btrfs with zstd compression against no compression, lzo
compression, and zlib compression. I benchmarked two scenarios. Copying
a set of files to btrfs, and then reading the files. Copying a tarball
to btrfs, extracting it to btrfs, and then reading the extracted files.
After every operation, I call `sync` and include the sync time.
Between every pair of operations I unmount and remount the filesystem
to avoid caching. The benchmark files can be found in the upstream
zstd source repository under
`contrib/linux-kernel/{btrfs-benchmark.sh,btrfs-extract-benchmark.sh}`
[1] [2].

I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM.
The VM is running on a MacBook Pro with a 3.1 GHz Intel Core i7 processor,
16 GB of RAM, and a SSD.

The first compression benchmark is copying 10 copies of the unzipped
Silesia corpus [3] into a BtrFS filesystem mounted with
`-o compress-force=Method`. The decompression benchmark times how long
it takes to `tar` all 10 copies into `/dev/null`. The compression ratio is
measured by comparing the output of `df` and `du`. See the benchmark file
[1] for details. I benchmarked multiple zstd compression levels, although
the patch uses zstd level 1.

| Method  | Ratio | Compression MB/s | Decompression speed |
|---------|-------|------------------|---------------------|
| None    |  0.99 |              504 |                 686 |
| lzo     |  1.66 |              398 |                 442 |
| zlib    |  2.58 |               65 |                 241 |
| zstd 1  |  2.57 |              260 |                 383 |
| zstd 3  |  2.71 |              174 |                 408 |
| zstd 6  |  2.87 |               70 |                 398 |
| zstd 9  |  2.92 |               43 |                 406 |
| zstd 12 |  2.93 |               21 |                 408 |
| zstd 15 |  3.01 |               11 |                 354 |

The next benchmark first copies `linux-4.11.6.tar` [4] to btrfs. Then it
measures the compression ratio, extracts the tar, and deletes the tar.
Then it measures the compression ratio again, and `tar`s the extracted
files into `/dev/null`. See the benchmark file [2] for details.

| Method | Tar Ratio | Extract Ratio | Copy (s) | Extract (s)| Read (s) |
|--------|-----------|---------------|----------|------------|----------|
| None   |      0.97 |          0.78 |    0.981 |      5.501 |    8.807 |
| lzo    |      2.06 |          1.38 |    1.631 |      8.458 |    8.585 |
| zlib   |      3.40 |          1.86 |    7.750 |     21.544 |   11.744 |
| zstd 1 |      3.57 |          1.85 |    2.579 |     11.479 |    9.389 |

[1] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/btrfs-benchmark.sh
[2] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/btrfs-extract-benchmark.sh
[3] http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
[4] https://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.11.6.tar.xz

zstd source repository: https://github.com/facebook/zstd

Signed-off-by: Nick Terrell <terrelln@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 include/uapi/linux/btrfs.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 9aa74f317747..378230c163d5 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -255,13 +255,7 @@ struct btrfs_ioctl_fs_info_args {
 #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL	(1ULL << 1)
 #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS	(1ULL << 2)
 #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO	(1ULL << 3)
-/*
- * some patches floated around with a second compression method
- * lets save that incompat here for when they do get in
- * Note we don't actually support it, we're just reserving the
- * number
- */
-#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2	(1ULL << 4)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD	(1ULL << 4)
 
 /*
  * older kernels tried to do bigger metadata blocks, but the
-- 
cgit v1.2.3


From fe4007999599c02598c17b643e8de43e487d48e8 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 15 Aug 2017 09:09:49 +0200
Subject: ipv6: fib: Provide offload indication using nexthop flags

IPv6 routes currently lack nexthop flags as in IPv4. This has several
implications.

In the forwarding path, it requires us to check the carrier state of the
nexthop device and potentially ignore a linkdown route, instead of
checking for RTNH_F_LINKDOWN.

It also requires capable drivers to use the user facing IPv6-specific
route flags to provide offload indication, instead of using the nexthop
flags as in IPv4.

Add nexthop flags to IPv6 routes in the 40 bytes hole and use it to
provide offload indication instead of the RTF_OFFLOAD flag, which is
removed while it's still not part of any official kernel release.

In the near future we would like to use the field for the
RTNH_F_{LINKDOWN,DEAD} flags, but this change is more involved and might
not be ready in time for the current cycle.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ipv6_route.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h
index 33e2a5732bd1..d496c02e14bc 100644
--- a/include/uapi/linux/ipv6_route.h
+++ b/include/uapi/linux/ipv6_route.h
@@ -35,7 +35,6 @@
 #define RTF_PREF(pref)	((pref) << 27)
 #define RTF_PREF_MASK	0x18000000
 
-#define RTF_OFFLOAD	0x20000000	/* offloaded route		*/
 #define RTF_PCPU	0x40000000	/* read-only: can not be set by user */
 #define RTF_LOCAL	0x80000000
 
-- 
cgit v1.2.3


From 6a1c9510694fe1e901a3b5b53386eac069adcea6 Mon Sep 17 00:00:00 2001
From: Moses Reuben <moses.reuben@amd.com>
Date: Tue, 15 Aug 2017 23:00:20 -0400
Subject: drm/amdkfd: Adding new IOCTL for scratch memory v2

v2:
* Renamed ALLOC_MEMORY_OF_SCRATCH to SET_SCRATCH_BACKING_VA
* Removed size parameter from the ioctl, it was unused
* Removed hole in ioctl number space
* No more call to write_config_static_mem
* Return correct error code from ioctl

Signed-off-by: Moses Reuben <moses.reuben@amd.com>
Signed-off-by: Ben Goz <ben.goz@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 include/uapi/linux/kfd_ioctl.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index d6833426fdef..1b9c5609d523 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -232,6 +232,12 @@ struct kfd_ioctl_wait_events_args {
 	uint32_t wait_result;		/* from KFD */
 };
 
+struct kfd_ioctl_set_scratch_backing_va_args {
+	uint64_t va_addr;	/* to KFD */
+	uint32_t gpu_id;	/* to KFD */
+	uint32_t pad;
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -286,7 +292,10 @@ struct kfd_ioctl_wait_events_args {
 #define AMDKFD_IOC_DBG_WAVE_CONTROL		\
 		AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
 
+#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA	\
+		AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x11
+#define AMDKFD_COMMAND_END		0x12
 
 #endif
-- 
cgit v1.2.3


From 5d71dbc3a588690c3d66d76db8cd29973425ce6d Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Tue, 15 Aug 2017 23:00:22 -0400
Subject: drm/amdkfd: Implement image tiling mode support v2

v2: Removed hole in ioctl number space

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 include/uapi/linux/kfd_ioctl.h | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 1b9c5609d523..7b4567bacfc2 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -238,6 +238,29 @@ struct kfd_ioctl_set_scratch_backing_va_args {
 	uint32_t pad;
 };
 
+struct kfd_ioctl_get_tile_config_args {
+	/* to KFD: pointer to tile array */
+	uint64_t tile_config_ptr;
+	/* to KFD: pointer to macro tile array */
+	uint64_t macro_tile_config_ptr;
+	/* to KFD: array size allocated by user mode
+	 * from KFD: array size filled by kernel
+	 */
+	uint32_t num_tile_configs;
+	/* to KFD: array size allocated by user mode
+	 * from KFD: array size filled by kernel
+	 */
+	uint32_t num_macro_tile_configs;
+
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t gb_addr_config;	/* from KFD */
+	uint32_t num_banks;		/* from KFD */
+	uint32_t num_ranks;		/* from KFD */
+	/* struct size can be extended later if needed
+	 * without breaking ABI compatibility
+	 */
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -295,7 +318,10 @@ struct kfd_ioctl_set_scratch_backing_va_args {
 #define AMDKFD_IOC_SET_SCRATCH_BACKING_VA	\
 		AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args)
 
+#define AMDKFD_IOC_GET_TILE_CONFIG                                      \
+		AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x12
+#define AMDKFD_COMMAND_END		0x13
 
 #endif
-- 
cgit v1.2.3


From b005fd189cec9407b700599e1e80e0552446ee79 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 15 Aug 2017 22:31:58 -0700
Subject: bpf: introduce new program type for skbs on sockets

A class of programs, run from strparser and soon from a new map type
called sock map, are used with skb as the context but on established
sockets. By creating a specific program type for these we can use
bpf helpers that expect full sockets and get the verifier to ensure
these helpers are not used out of context.

The new type is BPF_PROG_TYPE_SK_SKB. This patch introduces the
infrastructure and type.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 91da8371a2d0..2e796e384aeb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -127,6 +127,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_LWT_OUT,
 	BPF_PROG_TYPE_LWT_XMIT,
 	BPF_PROG_TYPE_SOCK_OPS,
+	BPF_PROG_TYPE_SK_SKB,
 };
 
 enum bpf_attach_type {
-- 
cgit v1.2.3


From 174a79ff9515f400b9a6115643dafd62a635b7e6 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 15 Aug 2017 22:32:47 -0700
Subject: bpf: sockmap with sk redirect support

Recently we added a new map type called dev map used to forward XDP
packets between ports (6093ec2dc313). This patches introduces a
similar notion for sockets.

A sockmap allows users to add participating sockets to a map. When
sockets are added to the map enough context is stored with the
map entry to use the entry with a new helper

  bpf_sk_redirect_map(map, key, flags)

This helper (analogous to bpf_redirect_map in XDP) is given the map
and an entry in the map. When called from a sockmap program, discussed
below, the skb will be sent on the socket using skb_send_sock().

With the above we need a bpf program to call the helper from that will
then implement the send logic. The initial site implemented in this
series is the recv_sock hook. For this to work we implemented a map
attach command to add attributes to a map. In sockmap we add two
programs a parse program and a verdict program. The parse program
uses strparser to build messages and pass them to the verdict program.
The parse programs use the normal strparser semantics. The verdict
program is of type SK_SKB.

The verdict program returns a verdict SK_DROP, or  SK_REDIRECT for
now. Additional actions may be added later. When SK_REDIRECT is
returned, expected when bpf program uses bpf_sk_redirect_map(), the
sockmap logic will consult per cpu variables set by the helper routine
and pull the sock entry out of the sock map. This pattern follows the
existing redirect logic in cls and xdp programs.

This gives the flow,

 recv_sock -> str_parser (parse_prog) -> verdict_prog -> skb_send_sock
                                                     \
                                                      -> kfree_skb

As an example use case a message based load balancer may use specific
logic in the verdict program to select the sock to send on.

Sample programs are provided in future patches that hopefully illustrate
the user interfaces. Also selftests are in follow-on patches.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2e796e384aeb..7f774769e3f5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -110,6 +110,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_ARRAY_OF_MAPS,
 	BPF_MAP_TYPE_HASH_OF_MAPS,
 	BPF_MAP_TYPE_DEVMAP,
+	BPF_MAP_TYPE_SOCKMAP,
 };
 
 enum bpf_prog_type {
@@ -135,11 +136,15 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET_EGRESS,
 	BPF_CGROUP_INET_SOCK_CREATE,
 	BPF_CGROUP_SOCK_OPS,
+	BPF_CGROUP_SMAP_INGRESS,
 	__MAX_BPF_ATTACH_TYPE
 };
 
 #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
 
+/* If BPF_SOCKMAP_STRPARSER is used sockmap will use strparser on receive */
+#define BPF_SOCKMAP_STRPARSER	(1U << 0)
+
 /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
  * to the given target_fd cgroup the descendent cgroup will be able to
  * override effective bpf program that was inherited from this cgroup
@@ -211,6 +216,7 @@ union bpf_attr {
 		__u32		attach_bpf_fd;	/* eBPF program to attach */
 		__u32		attach_type;
 		__u32		attach_flags;
+		__u32		attach_bpf_fd2;
 	};
 
 	struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
@@ -557,6 +563,23 @@ union bpf_attr {
  *     @mode: operation mode (enum bpf_adj_room_mode)
  *     @flags: reserved for future use
  *     Return: 0 on success or negative error code
+ *
+ * int bpf_sk_redirect_map(map, key, flags)
+ *     Redirect skb to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_REDIRECT
+ *
+ * int bpf_sock_map_update(skops, map, key, flags, map_flags)
+ *	@skops: pointer to bpf_sock_ops
+ *	@map: pointer to sockmap to update
+ *	@key: key to insert/update sock in map
+ *	@flags: same flags as map update elem
+ *	@map_flags: sock map specific flags
+ *	   bit 1: Enable strparser
+ *	   other bits: reserved
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -610,7 +633,9 @@ union bpf_attr {
 	FN(set_hash),			\
 	FN(setsockopt),			\
 	FN(skb_adjust_room),		\
-	FN(redirect_map),
+	FN(redirect_map),		\
+	FN(sk_redirect_map),		\
+	FN(sock_map_update),		\
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -747,6 +772,12 @@ struct xdp_md {
 	__u32 data_end;
 };
 
+enum sk_action {
+	SK_ABORTED = 0,
+	SK_DROP,
+	SK_REDIRECT,
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
-- 
cgit v1.2.3


From 8a31db5615667956c513d205cfb06885c3ec6d0b Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 15 Aug 2017 22:33:09 -0700
Subject: bpf: add access to sock fields and pkt data from sk_skb programs

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7f774769e3f5..5ecbe812a2cc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -712,6 +712,15 @@ struct __sk_buff {
 	__u32 data;
 	__u32 data_end;
 	__u32 napi_id;
+
+	/* accessed by BPF_PROG_TYPE_sk_skb types */
+	__u32 family;
+	__u32 remote_ip4;	/* Stored in network byte order */
+	__u32 local_ip4;	/* Stored in network byte order */
+	__u32 remote_ip6[4];	/* Stored in network byte order */
+	__u32 local_ip6[4];	/* Stored in network byte order */
+	__u32 remote_port;	/* Stored in network byte order */
+	__u32 local_port;	/* stored in host byte order */
 };
 
 struct bpf_tunnel_key {
-- 
cgit v1.2.3


From 22e4ebb975822833b083533035233d128b30e98f Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Fri, 28 Jul 2017 16:40:40 -0400
Subject: membarrier: Provide expedited private command

Implement MEMBARRIER_CMD_PRIVATE_EXPEDITED with IPIs using cpumask built
from all runqueues for which current thread's mm is the same as the
thread calling sys_membarrier. It executes faster than the non-expedited
variant (no blocking). It also works on NOHZ_FULL configurations.

Scheduler-wise, it requires a memory barrier before and after context
switching between processes (which have different mm). The memory
barrier before context switch is already present. For the barrier after
context switch:

* Our TSO archs can do RELEASE without being a full barrier. Look at
  x86 spin_unlock() being a regular STORE for example.  But for those
  archs, all atomics imply smp_mb and all of them have atomic ops in
  switch_mm() for mm_cpumask(), and on x86 the CR3 load acts as a full
  barrier.

* From all weakly ordered machines, only ARM64 and PPC can do RELEASE,
  the rest does indeed do smp_mb(), so there the spin_unlock() is a full
  barrier and we're good.

* ARM64 has a very heavy barrier in switch_to(), which suffices.

* PPC just removed its barrier from switch_to(), but appears to be
  talking about adding something to switch_mm(). So add a
  smp_mb__after_unlock_lock() for now, until this is settled on the PPC
  side.

Changes since v3:
- Properly document the memory barriers provided by each architecture.

Changes since v2:
- Address comments from Peter Zijlstra,
- Add smp_mb__after_unlock_lock() after finish_lock_switch() in
  finish_task_switch() to add the memory barrier we need after storing
  to rq->curr. This is much simpler than the previous approach relying
  on atomic_dec_and_test() in mmdrop(), which actually added a memory
  barrier in the common case of switching between userspace processes.
- Return -EINVAL when MEMBARRIER_CMD_SHARED is used on a nohz_full
  kernel, rather than having the whole membarrier system call returning
  -ENOSYS. Indeed, CMD_PRIVATE_EXPEDITED is compatible with nohz_full.
  Adapt the CMD_QUERY mask accordingly.

Changes since v1:
- move membarrier code under kernel/sched/ because it uses the
  scheduler runqueue,
- only add the barrier when we switch from a kernel thread. The case
  where we switch from a user-space thread is already handled by
  the atomic_dec_and_test() in mmdrop().
- add a comment to mmdrop() documenting the requirement on the implicit
  memory barrier.

CC: Peter Zijlstra <peterz@infradead.org>
CC: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
CC: Boqun Feng <boqun.feng@gmail.com>
CC: Andrew Hunter <ahh@google.com>
CC: Maged Michael <maged.michael@gmail.com>
CC: gromer@google.com
CC: Avi Kivity <avi@scylladb.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Dave Watson <davejwatson@fb.com>
---
 include/uapi/linux/membarrier.h | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h
index e0b108bd2624..6d47b3249d8a 100644
--- a/include/uapi/linux/membarrier.h
+++ b/include/uapi/linux/membarrier.h
@@ -40,14 +40,33 @@
  *                          (non-running threads are de facto in such a
  *                          state). This covers threads from all processes
  *                          running on the system. This command returns 0.
+ * @MEMBARRIER_CMD_PRIVATE_EXPEDITED:
+ *                          Execute a memory barrier on each running
+ *                          thread belonging to the same process as the current
+ *                          thread. Upon return from system call, the
+ *                          caller thread is ensured that all its running
+ *                          threads siblings have passed through a state
+ *                          where all memory accesses to user-space
+ *                          addresses match program order between entry
+ *                          to and return from the system call
+ *                          (non-running threads are de facto in such a
+ *                          state). This only covers threads from the
+ *                          same processes as the caller thread. This
+ *                          command returns 0. The "expedited" commands
+ *                          complete faster than the non-expedited ones,
+ *                          they never block, but have the downside of
+ *                          causing extra overhead.
  *
  * Command to be passed to the membarrier system call. The commands need to
  * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
  * the value 0.
  */
 enum membarrier_cmd {
-	MEMBARRIER_CMD_QUERY = 0,
-	MEMBARRIER_CMD_SHARED = (1 << 0),
+	MEMBARRIER_CMD_QUERY			= 0,
+	MEMBARRIER_CMD_SHARED			= (1 << 0),
+	/* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */
+	/* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */
+	MEMBARRIER_CMD_PRIVATE_EXPEDITED	= (1 << 3),
 };
 
 #endif /* _UAPI_LINUX_MEMBARRIER_H */
-- 
cgit v1.2.3


From 0888e372c37fa31882c8ed89fb2f8188b08b6718 Mon Sep 17 00:00:00 2001
From: "Levin, Alexander (Sasha Levin)" <alexander.levin@one.verizon.com>
Date: Thu, 17 Aug 2017 00:35:11 +0000
Subject: net: inet: diag: expose sockets cgroup classid

This is useful for directly looking up a task based on class id rather than
having to scan through all open file descriptors.

Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/inet_diag.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index bbe201047df6..678496897a68 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -142,6 +142,7 @@ enum {
 	INET_DIAG_PAD,
 	INET_DIAG_MARK,
 	INET_DIAG_BBRINFO,
+	INET_DIAG_CLASS_ID,
 	__INET_DIAG_MAX,
 };
 
-- 
cgit v1.2.3


From 99d1712bc41c7c9a5a473c104a4ad15427757b22 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 8 Aug 2017 15:15:29 +0200
Subject: netfilter: exthdr: tcp option set support

This allows setting 2 and 4 byte quantities in the tcp option space.
Main purpose is to allow native replacement for xt_TCPMSS to
work around pmtu blackholes.

Writes to kind and len are now allowed at the moment, it does not seem
useful to do this as it causes corruption of the tcp option space.

We can always lift this restriction later if a use-case appears.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index be25cf69295b..40fd199f7531 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -732,7 +732,8 @@ enum nft_exthdr_op {
  * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32)
  * @NFTA_EXTHDR_LEN: extension header length (NLA_U32)
  * @NFTA_EXTHDR_FLAGS: extension header flags (NLA_U32)
- * @NFTA_EXTHDR_OP: option match type (NLA_U8)
+ * @NFTA_EXTHDR_OP: option match type (NLA_U32)
+ * @NFTA_EXTHDR_SREG: option match type (NLA_U32)
  */
 enum nft_exthdr_attributes {
 	NFTA_EXTHDR_UNSPEC,
@@ -742,6 +743,7 @@ enum nft_exthdr_attributes {
 	NFTA_EXTHDR_LEN,
 	NFTA_EXTHDR_FLAGS,
 	NFTA_EXTHDR_OP,
+	NFTA_EXTHDR_SREG,
 	__NFTA_EXTHDR_MAX
 };
 #define NFTA_EXTHDR_MAX		(__NFTA_EXTHDR_MAX - 1)
-- 
cgit v1.2.3


From 6b5dc98e8fac041a3decfc3186e08c1c570ea691 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 8 Aug 2017 15:48:04 +0200
Subject: netfilter: rt: add support to fetch path mss

to be used in combination with tcp option set support to mimic
iptables TCPMSS --clamp-mss-to-pmtu.

v2: Eric Dumazet points out dst must be initialized.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 40fd199f7531..b49da72efa68 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -811,11 +811,13 @@ enum nft_meta_keys {
  * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid)
  * @NFT_RT_NEXTHOP4: routing nexthop for IPv4
  * @NFT_RT_NEXTHOP6: routing nexthop for IPv6
+ * @NFT_RT_TCPMSS: fetch current path tcp mss
  */
 enum nft_rt_keys {
 	NFT_RT_CLASSID,
 	NFT_RT_NEXTHOP4,
 	NFT_RT_NEXTHOP6,
+	NFT_RT_TCPMSS,
 };
 
 /**
-- 
cgit v1.2.3


From 96eabe7a40aa17e613cf3db2c742ee8b1fc764d0 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 18 Aug 2017 11:28:00 -0700
Subject: bpf: Allow selecting numa node during map creation

The current map creation API does not allow to provide the numa-node
preference.  The memory usually comes from where the map-creation-process
is running.  The performance is not ideal if the bpf_prog is known to
always run in a numa node different from the map-creation-process.

One of the use case is sharding on CPU to different LRU maps (i.e.
an array of LRU maps).  Here is the test result of map_perf_test on
the INNER_LRU_HASH_PREALLOC test if we force the lru map used by
CPU0 to be allocated from a remote numa node:

[ The machine has 20 cores. CPU0-9 at node 0. CPU10-19 at node 1 ]

># taskset -c 10 ./map_perf_test 512 8 1260000 8000000
5:inner_lru_hash_map_perf pre-alloc 1628380 events per sec
4:inner_lru_hash_map_perf pre-alloc 1626396 events per sec
3:inner_lru_hash_map_perf pre-alloc 1626144 events per sec
6:inner_lru_hash_map_perf pre-alloc 1621657 events per sec
2:inner_lru_hash_map_perf pre-alloc 1621534 events per sec
1:inner_lru_hash_map_perf pre-alloc 1620292 events per sec
7:inner_lru_hash_map_perf pre-alloc 1613305 events per sec
0:inner_lru_hash_map_perf pre-alloc 1239150 events per sec  #<<<

After specifying numa node:
># taskset -c 10 ./map_perf_test 512 8 1260000 8000000
5:inner_lru_hash_map_perf pre-alloc 1629627 events per sec
3:inner_lru_hash_map_perf pre-alloc 1628057 events per sec
1:inner_lru_hash_map_perf pre-alloc 1623054 events per sec
6:inner_lru_hash_map_perf pre-alloc 1616033 events per sec
2:inner_lru_hash_map_perf pre-alloc 1614630 events per sec
4:inner_lru_hash_map_perf pre-alloc 1612651 events per sec
7:inner_lru_hash_map_perf pre-alloc 1609337 events per sec
0:inner_lru_hash_map_perf pre-alloc 1619340 events per sec #<<<

This patch adds one field, numa_node, to the bpf_attr.  Since numa node 0
is a valid node, a new flag BPF_F_NUMA_NODE is also added.  The numa_node
field is honored if and only if the BPF_F_NUMA_NODE flag is set.

Numa node selection is not supported for percpu map.

This patch does not change all the kmalloc.  F.e.
'htab = kzalloc()' is not changed since the object
is small enough to stay in the cache.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 5ecbe812a2cc..843818dff96d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -165,6 +165,7 @@ enum bpf_attach_type {
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
 #define BPF_EXIST	2 /* update existing element */
 
+/* flags for BPF_MAP_CREATE command */
 #define BPF_F_NO_PREALLOC	(1U << 0)
 /* Instead of having one common LRU list in the
  * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
@@ -173,6 +174,8 @@ enum bpf_attach_type {
  * across different LRU lists.
  */
 #define BPF_F_NO_COMMON_LRU	(1U << 1)
+/* Specify numa node during map creation */
+#define BPF_F_NUMA_NODE		(1U << 2)
 
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
@@ -180,8 +183,13 @@ union bpf_attr {
 		__u32	key_size;	/* size of key in bytes */
 		__u32	value_size;	/* size of value in bytes */
 		__u32	max_entries;	/* max number of entries in a map */
-		__u32	map_flags;	/* prealloc or not */
+		__u32	map_flags;	/* BPF_MAP_CREATE related
+					 * flags defined above.
+					 */
 		__u32	inner_map_fd;	/* fd pointing to the inner map */
+		__u32	numa_node;	/* numa node (effective only if
+					 * BPF_F_NUMA_NODE is set).
+					 */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
-- 
cgit v1.2.3


From 84e54fe0a5eaed696dee4019c396f8396f5a908b Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Tue, 22 Aug 2017 09:40:28 -0700
Subject: gre: introduce native tunnel support for ERSPAN

The patch adds ERSPAN type II tunnel support.  The implementation
is based on the draft at [1].  One of the purposes is for Linux
box to be able to receive ERSPAN monitoring traffic sent from
the Cisco switch, by creating a ERSPAN tunnel device.
In addition, the patch also adds ERSPAN TX, so Linux virtual
switch can redirect monitored traffic to the ERSPAN tunnel device.
The traffic will be encapsulated into ERSPAN and sent out.

The implementation reuses tunnel key as ERSPAN session ID, and
field 'erspan' as ERSPAN Index fields:
./ip link add dev ers11 type erspan seq key 100 erspan 123 \
			local 172.16.1.200 remote 172.16.1.100

To use the above device as ERSPAN receiver, configure
Nexus 5000 switch as below:

monitor session 100 type erspan-source
  erspan-id 123
  vrf default
  destination ip 172.16.1.200
  source interface Ethernet1/11 both
  source interface Ethernet1/12 both
  no shut
monitor erspan origin ip-address 172.16.1.100 global

[1] https://tools.ietf.org/html/draft-foschiano-erspan-01
[2] iproute2 patch: http://marc.info/?l=linux-netdev&m=150306086924951&w=2
[3] test script: http://marc.info/?l=linux-netdev&m=150231021807304&w=2

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Meenakshi Vohra <mvohra@vmware.com>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h  | 1 +
 include/uapi/linux/if_tunnel.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 5bc9bfd816b7..efeb1190c2ca 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -66,6 +66,7 @@
 #define ETH_P_ATALK	0x809B		/* Appletalk DDP		*/
 #define ETH_P_AARP	0x80F3		/* Appletalk AARP		*/
 #define ETH_P_8021Q	0x8100          /* 802.1Q VLAN Extended Header  */
+#define ETH_P_ERSPAN	0x88BE		/* ERSPAN type II		*/
 #define ETH_P_IPX	0x8137		/* IPX over DIX			*/
 #define ETH_P_IPV6	0x86DD		/* IPv6 over bluebook		*/
 #define ETH_P_PAUSE	0x8808		/* IEEE Pause frames. See 802.3 31B */
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 6792d1967d31..2e520883c054 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -134,6 +134,7 @@ enum {
 	IFLA_GRE_COLLECT_METADATA,
 	IFLA_GRE_IGNORE_DF,
 	IFLA_GRE_FWMARK,
+	IFLA_GRE_ERSPAN_INDEX,
 	__IFLA_GRE_MAX,
 };
 
-- 
cgit v1.2.3


From 5cdcf4c6a638591ec0e98c57404a19e7f9997567 Mon Sep 17 00:00:00 2001
From: Martijn Coenen <maco@android.com>
Date: Fri, 28 Jul 2017 13:56:06 +0200
Subject: ANDROID: binder: add padding to binder_fd_array_object.

binder_fd_array_object starts with a 4-byte header,
followed by a few fields that are 8 bytes when
ANDROID_BINDER_IPC_32BIT=N.

This can cause alignment issues in a 64-bit kernel
with a 32-bit userspace, as on x86_32 an 8-byte primitive
may be aligned to a 4-byte address. Pad with a __u32
to fix this.

Signed-off-by: Martijn Coenen <maco@android.com>
Cc: stable <stable@vger.kernel.org> # 4.11+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/android/binder.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index 51f891fb1b18..7668b5791c91 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -132,6 +132,7 @@ enum {
 
 /* struct binder_fd_array_object - object describing an array of fds in a buffer
  * @hdr:		common header structure
+ * @pad:		padding to ensure correct alignment
  * @num_fds:		number of file descriptors in the buffer
  * @parent:		index in offset array to buffer holding the fd array
  * @parent_offset:	start offset of fd array in the buffer
@@ -152,6 +153,7 @@ enum {
  */
 struct binder_fd_array_object {
 	struct binder_object_header	hdr;
+	__u32				pad;
 	binder_size_t			num_fds;
 	binder_size_t			parent;
 	binder_size_t			parent_offset;
-- 
cgit v1.2.3


From 1e6ec9ea89d30739b9447c1860fcb07fc29f3aef Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 23 Aug 2017 14:54:59 -0700
Subject: Revert "loop: support 4k physical blocksize"

There's some stuff still up in the air, let's not get stuck with a
subpar ABI. I'll follow up with something better for 4.14.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/loop.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index a3960f98679c..c8125ec1f4f2 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -22,7 +22,6 @@ enum {
 	LO_FLAGS_AUTOCLEAR	= 4,
 	LO_FLAGS_PARTSCAN	= 8,
 	LO_FLAGS_DIRECT_IO	= 16,
-	LO_FLAGS_BLOCKSIZE	= 32,
 };
 
 #include <asm/posix_types.h>	/* for __kernel_old_dev_t */
@@ -60,8 +59,6 @@ struct loop_info64 {
 	__u64		   lo_init[2];
 };
 
-#define LO_INFO_BLOCKSIZE(l) (l)->lo_init[0]
-
 /*
  * Loop filter types
  */
-- 
cgit v1.2.3


From ecda85e70277ef24e44a1f6bc00243cebd19f985 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 16 Aug 2017 19:31:57 +0200
Subject: x86/lguest: Remove lguest support

Lguest seems to be rather unused these days. It has seen only patches
ensuring it still builds the last two years and its official state is
"Odd Fixes".

Remove it in order to be able to clean up the paravirt code.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: boris.ostrovsky@oracle.com
Cc: lguest@lists.ozlabs.org
Cc: rusty@rustcorp.com.au
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/20170816173157.8633-3-jgross@suse.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/virtio_ring.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h
index c07295969b7e..6d5d5faa989b 100644
--- a/include/uapi/linux/virtio_ring.h
+++ b/include/uapi/linux/virtio_ring.h
@@ -1,7 +1,7 @@
 #ifndef _UAPI_LINUX_VIRTIO_RING_H
 #define _UAPI_LINUX_VIRTIO_RING_H
-/* An interface for efficient virtio implementation, currently for use by KVM
- * and lguest, but hopefully others soon.  Do NOT change this since it will
+/* An interface for efficient virtio implementation, currently for use by KVM,
+ * but hopefully others soon.  Do NOT change this since it will
  * break existing servers and clients.
  *
  * This header is BSD licensed so anyone can use the definitions to implement
-- 
cgit v1.2.3


From ea5311c7e752dbec9bfbdd79992a8772b37f32fa Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 10 Aug 2017 10:54:31 -0600
Subject: PCI: Fix PCIe capability sizes

PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 defines the size of the PCIe capability
structure for v1 devices with link, but we also have a need in the vfio
code for sizing the capability for devices without link, such as Root
Complex Integrated Endpoints.  Create a separate define for this ending the
structure before the link fields.

Additionally, this reveals that PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 is currently
incorrect, ending the capability length before the v2 link fields.  Rename
this to specify an RC Integrated Endpoint (no link) capability length and
move PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 to include the link fields as we have
for the v1 version.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
[bhelgaas: add "_" in "PCI_CAP_EXP_RC ENDPOINT_SIZEOF_V2 44"]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
 include/uapi/linux/pci_regs.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index c22d3ebaca20..e185d2d39ea6 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -513,6 +513,7 @@
 #define  PCI_EXP_DEVSTA_URD	0x0008	/* Unsupported Request Detected */
 #define  PCI_EXP_DEVSTA_AUXPD	0x0010	/* AUX Power Detected */
 #define  PCI_EXP_DEVSTA_TRPND	0x0020	/* Transactions Pending */
+#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1	12	/* v1 endpoints without link end here */
 #define PCI_EXP_LNKCAP		12	/* Link Capabilities */
 #define  PCI_EXP_LNKCAP_SLS	0x0000000f /* Supported Link Speeds */
 #define  PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */
@@ -556,7 +557,7 @@
 #define  PCI_EXP_LNKSTA_DLLLA	0x2000	/* Data Link Layer Link Active */
 #define  PCI_EXP_LNKSTA_LBMS	0x4000	/* Link Bandwidth Management Status */
 #define  PCI_EXP_LNKSTA_LABS	0x8000	/* Link Autonomous Bandwidth Status */
-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1	20	/* v1 endpoints end here */
+#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1	20	/* v1 endpoints with link end here */
 #define PCI_EXP_SLTCAP		20	/* Slot Capabilities */
 #define  PCI_EXP_SLTCAP_ABP	0x00000001 /* Attention Button Present */
 #define  PCI_EXP_SLTCAP_PCP	0x00000002 /* Power Controller Present */
@@ -639,7 +640,7 @@
 #define  PCI_EXP_DEVCTL2_OBFF_MSGB_EN	0x4000	/* Enable OBFF Message type B */
 #define  PCI_EXP_DEVCTL2_OBFF_WAKE_EN	0x6000	/* OBFF using WAKE# signaling */
 #define PCI_EXP_DEVSTA2		42	/* Device Status 2 */
-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2	44	/* v2 endpoints end here */
+#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2	44	/* v2 endpoints without link end here */
 #define PCI_EXP_LNKCAP2		44	/* Link Capabilities 2 */
 #define  PCI_EXP_LNKCAP2_SLS_2_5GB	0x00000002 /* Supported Speed 2.5GT/s */
 #define  PCI_EXP_LNKCAP2_SLS_5_0GB	0x00000004 /* Supported Speed 5.0GT/s */
@@ -647,6 +648,7 @@
 #define  PCI_EXP_LNKCAP2_CROSSLINK	0x00000100 /* Crosslink supported */
 #define PCI_EXP_LNKCTL2		48	/* Link Control 2 */
 #define PCI_EXP_LNKSTA2		50	/* Link Status 2 */
+#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2	52	/* v2 endpoints with link end here */
 #define PCI_EXP_SLTCAP2		52	/* Slot Capabilities 2 */
 #define PCI_EXP_SLTCTL2		56	/* Slot Control 2 */
 #define PCI_EXP_SLTSTA2		58	/* Slot Status 2 */
-- 
cgit v1.2.3


From f20c4ea49ec4708de97248927ac6138c2d14eba9 Mon Sep 17 00:00:00 2001
From: Dongdong Liu <liudongdong3@huawei.com>
Date: Sat, 19 Aug 2017 17:07:20 +0800
Subject: PCI/DPC: Add eDPC support

Add eDPC support. Get and print the RP PIO error information when the
trigger condition is RP PIO error.

For more information on eDPC, please see PCI Express Base Specification
Revision 3.1, section 6.2.10.3, or view the PCI-SIG eDPC ECN here:
https://pcisig.com/sites/default/files/specification_documents/ECN_Enhanced_DPC_2012-11-19_final.pdf

Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
---
 include/uapi/linux/pci_regs.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index c22d3ebaca20..1ce96275531c 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -967,6 +967,7 @@
 #define  PCI_EXP_DPC_CAP_RP_EXT		0x20	/* Root Port Extensions for DPC */
 #define  PCI_EXP_DPC_CAP_POISONED_TLP	0x40	/* Poisoned TLP Egress Blocking Supported */
 #define  PCI_EXP_DPC_CAP_SW_TRIGGER	0x80	/* Software Triggering Supported */
+#define  PCI_EXP_DPC_RP_PIO_LOG_SIZE	0xF00	/* RP PIO log size */
 #define  PCI_EXP_DPC_CAP_DL_ACTIVE	0x1000	/* ERR_COR signal on DL_Active supported */
 
 #define PCI_EXP_DPC_CTL			6	/* DPC control */
@@ -980,6 +981,15 @@
 
 #define PCI_EXP_DPC_SOURCE_ID		10	/* DPC Source Identifier */
 
+#define PCI_EXP_DPC_RP_PIO_STATUS	 0x0C	/* RP PIO Status */
+#define PCI_EXP_DPC_RP_PIO_MASK		 0x10	/* RP PIO MASK */
+#define PCI_EXP_DPC_RP_PIO_SEVERITY	 0x14	/* RP PIO Severity */
+#define PCI_EXP_DPC_RP_PIO_SYSERROR	 0x18	/* RP PIO SysError */
+#define PCI_EXP_DPC_RP_PIO_EXCEPTION	 0x1C	/* RP PIO Exception */
+#define PCI_EXP_DPC_RP_PIO_HEADER_LOG	 0x20	/* RP PIO Header Log */
+#define PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG	 0x30	/* RP PIO ImpSpec Log */
+#define PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG 0x34	/* RP PIO TLP Prefix Log */
+
 /* Precision Time Measurement */
 #define PCI_PTM_CAP			0x04	    /* PTM Capability */
 #define  PCI_PTM_CAP_REQ		0x00000001  /* Requester capable */
-- 
cgit v1.2.3


From 1177009131bee310421f5c04c43d3777cbacbdc8 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Thu, 24 Aug 2017 08:39:59 +0200
Subject: devlink: Add Ethernet header for dpipe

This will be used by the IPv4 host table which will be introduced in the
following patches. This header is global and can be reused by many
drivers.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index b0e807ac53bb..a855f8dcc8ee 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -226,4 +226,12 @@ enum devlink_dpipe_action_type {
 	DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY,
 };
 
+enum devlink_dpipe_field_ethernet_id {
+	DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
+};
+
+enum devlink_dpipe_header_id {
+	DEVLINK_DPIPE_HEADER_ETHERNET,
+};
+
 #endif /* _UAPI_LINUX_DEVLINK_H_ */
-- 
cgit v1.2.3


From 3fb886ecea93605a8ea14e258ff3158b8966781e Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Thu, 24 Aug 2017 08:40:00 +0200
Subject: devlink: Add IPv4 header for dpipe

This will be used by the IPv4 host table which will be introduced in the
following patches. This header is global and can be reused by many
drivers.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index a855f8dcc8ee..6c172548589d 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -230,8 +230,13 @@ enum devlink_dpipe_field_ethernet_id {
 	DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
 };
 
+enum devlink_dpipe_field_ipv4_id {
+	DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
+};
+
 enum devlink_dpipe_header_id {
 	DEVLINK_DPIPE_HEADER_ETHERNET,
+	DEVLINK_DPIPE_HEADER_IPV4,
 };
 
 #endif /* _UAPI_LINUX_DEVLINK_H_ */
-- 
cgit v1.2.3


From 55f2467cd717241dd941153d4db1e23c91aecf98 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 17 Aug 2017 15:50:35 +0300
Subject: RDMA/mlx4: Fix create qp command alignment

Avoid extra padding by replacing the order of inl_recv_sz and reserved,
otherwise 'mlx4_ib_create_qp' structure might be larger than legacy user
input leading to copy of some garbage data from the user space buffer.

Fixes: ea30b966f7dd ('IB/mlx4: Add inline-receive support')
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/mlx4-abi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h
index d915cab37ec3..21cce1a4c4dd 100644
--- a/include/uapi/rdma/mlx4-abi.h
+++ b/include/uapi/rdma/mlx4-abi.h
@@ -111,8 +111,8 @@ struct mlx4_ib_create_qp {
 	__u8	log_sq_bb_count;
 	__u8	log_sq_stride;
 	__u8	sq_no_prefetch;
-	__u32	inl_recv_sz;
 	__u8	reserved;
+	__u32	inl_recv_sz;
 };
 
 struct mlx4_ib_create_wq {
-- 
cgit v1.2.3


From 078b3573030346df0cdc46d798c0f434dc53c2cc Mon Sep 17 00:00:00 2001
From: Guy Levi <guyle@mellanox.com>
Date: Thu, 17 Aug 2017 15:50:47 +0300
Subject: IB/mlx4: Fix struct mlx4_ib_create_wq alignment

The mlx4 ABI defines to have structures with alignment of 64B.

Fixes: 400b1ebcfe31 ("IB/mlx4: Add support for WQ related verbs")
Signed-off-by: Guy Levi <guyle@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/mlx4-abi.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h
index 21cce1a4c4dd..0e10102861b5 100644
--- a/include/uapi/rdma/mlx4-abi.h
+++ b/include/uapi/rdma/mlx4-abi.h
@@ -121,7 +121,6 @@ struct mlx4_ib_create_wq {
 	__u8	log_range_size;
 	__u8	reserved[3];
 	__u32   comp_mask;
-	__u32   reserved1;
 };
 
 struct mlx4_ib_modify_wq {
-- 
cgit v1.2.3


From b23673f86fd0d9ccbc088e88e29899b4d3f0f055 Mon Sep 17 00:00:00 2001
From: Guy Levi <guyle@mellanox.com>
Date: Thu, 17 Aug 2017 15:50:48 +0300
Subject: IB/mlx4: Remove redundant attribute in mlx4_ib_create_qp_rss struct

rx_key_len is not in use and needs to be removed.

Fixes: 3078f5f1bd8b ("IB/mlx4: Add support for RSS QP")
Signed-off-by: Guy Levi <guyle@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/mlx4-abi.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h
index 0e10102861b5..c55f60e05f86 100644
--- a/include/uapi/rdma/mlx4-abi.h
+++ b/include/uapi/rdma/mlx4-abi.h
@@ -98,8 +98,7 @@ struct mlx4_ib_create_srq_resp {
 struct mlx4_ib_create_qp_rss {
 	__u64   rx_hash_fields_mask;
 	__u8    rx_hash_function;
-	__u8    rx_key_len;
-	__u8    reserved[6];
+	__u8    reserved[7];
 	__u8    rx_hash_key[40];
 	__u32   comp_mask;
 	__u32   reserved1;
-- 
cgit v1.2.3


From 96dc3fc5f1d66b20cdf839d571c7b907e08d5d00 Mon Sep 17 00:00:00 2001
From: Noa Osherovich <noaos@mellanox.com>
Date: Thu, 17 Aug 2017 15:52:28 +0300
Subject: IB/mlx5: Expose software parsing for Raw Ethernet QP

Software parsing (SWP) is a feature that can be used to instruct the
device to stop using its internal parser and to parse packets on the
transmit path according to offsets set for each packets.

Through this feature, the device allows the handling of checksum and
LSO by the hardware according to the location of IP and TCP/UDP
headers.

Enable SW parsing on Raw Ethernet send queue by default if firmware
supports it and report these capabilities to user space.

Signed-off-by: Noa Osherovich <noaos@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/mlx5-abi.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 0b3d30837a9f..64d398e662cd 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -168,6 +168,22 @@ struct mlx5_packet_pacing_caps {
 	__u32 reserved;
 };
 
+enum mlx5_ib_sw_parsing_offloads {
+	MLX5_IB_SW_PARSING = 1 << 0,
+	MLX5_IB_SW_PARSING_CSUM = 1 << 1,
+	MLX5_IB_SW_PARSING_LSO = 1 << 2,
+};
+
+struct mlx5_ib_sw_parsing_caps {
+	__u32 sw_parsing_offloads; /* enum mlx5_ib_sw_parsing_offloads */
+
+	/* Corresponding bit will be set if qp type from
+	 * 'enum ib_qp_type' is supported, e.g.
+	 * supported_qpts |= 1 << IB_QPT_RAW_PACKET
+	 */
+	__u32 supported_qpts;
+};
+
 struct mlx5_ib_query_device_resp {
 	__u32	comp_mask;
 	__u32	response_length;
@@ -177,6 +193,7 @@ struct mlx5_ib_query_device_resp {
 	struct	mlx5_packet_pacing_caps packet_pacing_caps;
 	__u32	mlx5_ib_support_multi_pkt_send_wqes;
 	__u32	reserved;
+	struct mlx5_ib_sw_parsing_caps sw_parsing_caps;
 };
 
 struct mlx5_ib_create_cq {
-- 
cgit v1.2.3


From 795b609c8b59f8f20fa9d72bf8b4ae3b8aa5582c Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Thu, 17 Aug 2017 15:52:34 +0300
Subject: IB/mlx5: Allow posting multi packet send WQEs if hardware supports

Set the field to allow posting multi packet send WQEs if hardware
supports this feature. This doesn't mean the send WQEs will be for
multi packet unless the send WQE was prepared according to multi
packet send WQE format.

User space shall use flag MLX5_IB_ALLOW_MPW to check if hardware
supports MPW and allows MPW in SQ context.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/mlx5-abi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 64d398e662cd..a61a512e5747 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -168,6 +168,11 @@ struct mlx5_packet_pacing_caps {
 	__u32 reserved;
 };
 
+enum mlx5_ib_mpw_caps {
+	MPW_RESERVED		= 1 << 0,
+	MLX5_IB_ALLOW_MPW	= 1 << 1,
+};
+
 enum mlx5_ib_sw_parsing_offloads {
 	MLX5_IB_SW_PARSING = 1 << 0,
 	MLX5_IB_SW_PARSING_CSUM = 1 << 1,
-- 
cgit v1.2.3


From 050da902adde8faf6b1bef15ac4876ae145358f4 Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Thu, 17 Aug 2017 15:52:35 +0300
Subject: IB/mlx5: Report mlx5 enhanced multi packet WQE capability

Expose enhanced multi packet WQE capability to user space through
query_device by uhw.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/mlx5-abi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index a61a512e5747..1791bf123ba9 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -171,6 +171,7 @@ struct mlx5_packet_pacing_caps {
 enum mlx5_ib_mpw_caps {
 	MPW_RESERVED		= 1 << 0,
 	MLX5_IB_ALLOW_MPW	= 1 << 1,
+	MLX5_IB_SUPPORT_EMPW	= 1 << 2,
 };
 
 enum mlx5_ib_sw_parsing_offloads {
-- 
cgit v1.2.3


From 6ae5fa61d27dcb055f4198bcf6c8dbbf1bb33f52 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 16 Aug 2017 15:21:54 -0700
Subject: perf/x86: Fix data source decoding for Skylake

Skylake changed the encoding of the PEBS data source field.
Some combinations are not available anymore, but some new cases
e.g. for L4 cache hit are added.

Fix up the conversion table for Skylake, similar as had been done
for Nehalem.

On Skylake server the encoding for L4 actually means persistent
memory. Handle this case too.

To properly describe it in the abstracted perf format I had to add
some new fields. Since a hit can have only one level add a new
field that is an enumeration, not a bit field to describe
the level. It can describe any level. Some numbers are also
used to describe PMEM and LFB.

Also add a new generic remote flag that can be combined with
the generic level to signify a remote cache.

And there is an extension field for the snoop indication to handle
the Forward state.

I didn't add a generic flag for hops because it's not needed
for Skylake.

I changed the existing encodings for older CPUs to also fill in the
new level and remote fields.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: http://lkml.kernel.org/r/20170816222156.19953-3-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 642db5fa3286..2a37ae925d85 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -954,14 +954,20 @@ union perf_mem_data_src {
 			mem_snoop:5,	/* snoop mode */
 			mem_lock:2,	/* lock instr */
 			mem_dtlb:7,	/* tlb access */
-			mem_rsvd:31;
+			mem_lvl_num:4,	/* memory hierarchy level number */
+			mem_remote:1,   /* remote */
+			mem_snoopx:2,	/* snoop mode, ext */
+			mem_rsvd:24;
 	};
 };
 #elif defined(__BIG_ENDIAN_BITFIELD)
 union perf_mem_data_src {
 	__u64 val;
 	struct {
-		__u64	mem_rsvd:31,
+		__u64	mem_rsvd:24,
+			mem_snoopx:2,	/* snoop mode, ext */
+			mem_remote:1,   /* remote */
+			mem_lvl_num:4,	/* memory hierarchy level number */
 			mem_dtlb:7,	/* tlb access */
 			mem_lock:2,	/* lock instr */
 			mem_snoop:5,	/* snoop mode */
@@ -998,6 +1004,22 @@ union perf_mem_data_src {
 #define PERF_MEM_LVL_UNC	0x2000 /* Uncached memory */
 #define PERF_MEM_LVL_SHIFT	5
 
+#define PERF_MEM_REMOTE_REMOTE	0x01  /* Remote */
+#define PERF_MEM_REMOTE_SHIFT	37
+
+#define PERF_MEM_LVLNUM_L1	0x01 /* L1 */
+#define PERF_MEM_LVLNUM_L2	0x02 /* L2 */
+#define PERF_MEM_LVLNUM_L3	0x03 /* L3 */
+#define PERF_MEM_LVLNUM_L4	0x04 /* L4 */
+/* 5-0xa available */
+#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
+#define PERF_MEM_LVLNUM_LFB	0x0c /* LFB */
+#define PERF_MEM_LVLNUM_RAM	0x0d /* RAM */
+#define PERF_MEM_LVLNUM_PMEM	0x0e /* PMEM */
+#define PERF_MEM_LVLNUM_NA	0x0f /* N/A */
+
+#define PERF_MEM_LVLNUM_SHIFT	33
+
 /* snoop mode */
 #define PERF_MEM_SNOOP_NA	0x01 /* not available */
 #define PERF_MEM_SNOOP_NONE	0x02 /* no snoop */
@@ -1006,6 +1028,10 @@ union perf_mem_data_src {
 #define PERF_MEM_SNOOP_HITM	0x10 /* snoop hit modified */
 #define PERF_MEM_SNOOP_SHIFT	19
 
+#define PERF_MEM_SNOOPX_FWD	0x01 /* forward */
+/* 1 free */
+#define PERF_MEM_SNOOPX_SHIFT	37
+
 /* locked instruction */
 #define PERF_MEM_LOCK_NA	0x01 /* not available */
 #define PERF_MEM_LOCK_LOCKED	0x02 /* locked transaction */
-- 
cgit v1.2.3


From f44d85389e17b2e960620c1c6d89bda978a11f2b Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Date: Thu, 24 Aug 2017 16:08:14 +0100
Subject: drm: rename u32 in __u32 in uapi

All other fields use __

Cc: Ben Widawsky <ben@bwidawsk.net>
Fixes: db1689aa61b ("drm: Create a format/modifier blob")
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Daniel Stone <daniels@collabora.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Daniel Stone <daniels@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170824150814.5878-1-lionel.g.landwerlin@intel.com
---
 include/uapi/drm/drm_mode.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index a2bb7161f020..54fc38c3c3f1 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -715,24 +715,24 @@ struct drm_mode_atomic {
 struct drm_format_modifier_blob {
 #define FORMAT_BLOB_CURRENT 1
 	/* Version of this blob format */
-	u32 version;
+	__u32 version;
 
 	/* Flags */
-	u32 flags;
+	__u32 flags;
 
 	/* Number of fourcc formats supported */
-	u32 count_formats;
+	__u32 count_formats;
 
 	/* Where in this blob the formats exist (in bytes) */
-	u32 formats_offset;
+	__u32 formats_offset;
 
 	/* Number of drm_format_modifiers */
-	u32 count_modifiers;
+	__u32 count_modifiers;
 
 	/* Where in this blob the modifiers exist (in bytes) */
-	u32 modifiers_offset;
+	__u32 modifiers_offset;
 
-	/* u32 formats[] */
+	/* __u32 formats[] */
 	/* struct drm_format_modifier modifiers[] */
 };
 
-- 
cgit v1.2.3


From 3d52a7924172adf651eda8f1c95ff38146059307 Mon Sep 17 00:00:00 2001
From: Shreyas NC <shreyas.nc@intel.com>
Date: Wed, 23 Aug 2017 19:33:49 +0530
Subject: ASoC: Intel: uapi: Add new tokens for module common data

The module private data can be modelled independent of its instances so
that it can be reused by the module instances. So move module data to
common manifest which can be referenced by the module instances.

This requires new tokens to be defined to accommodate these changes. The
new tokens will specify buffer sizes, DSP cycles and respective indexes
corresponding to the pcm params in the topology manifest so that driver
need not compute them.

Signed-off-by: Shreyas NC <shreyas.nc@intel.com>
Signed-off-by: Guneshwor Singh <guneshwor.o.singh@intel.com>
Acked-By: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/sound/snd_sst_tokens.h | 92 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/sound/snd_sst_tokens.h b/include/uapi/sound/snd_sst_tokens.h
index dedb2056160d..f691e421f5e8 100644
--- a/include/uapi/sound/snd_sst_tokens.h
+++ b/include/uapi/sound/snd_sst_tokens.h
@@ -163,8 +163,71 @@
  *
  * %SKL_TKN_U32_DMA_BUF_SIZE:	DMA buffer size in millisec
  *
+ * %SKL_TKN_U32_PIPE_DIR:       Specifies pipe direction. Can be
+ *                              playback/capture.
+ *
+ * %SKL_TKN_U32_NUM_CONFIGS:    Number of pipe configs
+ *
+ * %SKL_TKN_U32_PATH_MEM_PGS:   Size of memory (in pages) required for pipeline
+ *                              and its data
+ *
+ * %SKL_TKN_U32_PIPE_CONFIG_ID: Config id for the modules in the pipe
+ *                              and PCM params supported by that pipe
+ *                              config. This is used as index to fill
+ *                              up the pipe config and module config
+ *                              structure.
+ *
+ * %SKL_TKN_U32_CFG_FREQ:
+ * %SKL_TKN_U8_CFG_CHAN:
+ * %SKL_TKN_U8_CFG_BPS:         PCM params (freq, channels, bits per sample)
+ *                              supported for each of the pipe configs.
+ *
+ * %SKL_TKN_CFG_MOD_RES_ID:     Module's resource index for each of the
+ *                              pipe config
+ *
+ * %SKL_TKN_CFG_MOD_FMT_ID:     Module's interface index for each of the
+ *                              pipe config
+ *
+ * %SKL_TKN_U8_NUM_MOD:         Number of modules in the manifest
+ *
+ * %SKL_TKN_MM_U8_MOD_IDX:      Current index of the module in the manifest
+ *
+ * %SKL_TKN_MM_U8_NUM_RES:      Number of resources for the module
+ *
+ * %SKL_TKN_MM_U8_NUM_INTF:     Number of interfaces for the module
+ *
+ * %SKL_TKN_MM_U32_RES_ID:      Resource index for the resource info to
+ *                              be filled into.
+ *                              A module can support multiple resource
+ *                              configuration and is represnted as a
+ *                              resource table. This index is used to
+ *                              fill information into appropriate index.
+ *
+ * %SKL_TKN_MM_U32_CPS:         DSP cycles per second
+ *
+ * %SKL_TKN_MM_U32_DMA_SIZE:    Allocated buffer size for gateway DMA
+ *
+ * %SKL_TKN_MM_U32_CPC:         DSP cycles allocated per frame
+ *
+ * %SKL_TKN_MM_U32_RES_PIN_ID:  Resource pin index in the module
+ *
+ * %SKL_TKN_MM_U32_INTF_PIN_ID: Interface index in the module
+ *
+ * %SKL_TKN_MM_U32_PIN_BUF:     Buffer size of the module pin
+ *
+ * %SKL_TKN_MM_U32_FMT_ID:      Format index for each of the interface/
+ *                              format information to be filled into.
+ *
+ * %SKL_TKN_MM_U32_NUM_IN_FMT:  Number of input formats
+ * %SKL_TKN_MM_U32_NUM_OUT_FMT: Number of output formats
+ *
  * module_id and loadable flags dont have tokens as these values will be
  * read from the DSP FW manifest
+ *
+ * Tokens defined can be used either in the manifest or widget private data.
+ *
+ * SKL_TKN_MM is used as a suffix for all tokens that represent
+ * module data in the manifest.
  */
 enum SKL_TKNS {
 	SKL_TKN_UUID = 1,
@@ -218,7 +281,34 @@ enum SKL_TKNS {
 	SKL_TKL_U32_D0I3_CAPS, /* Typo added at v4.10 */
 	SKL_TKN_U32_D0I3_CAPS = SKL_TKL_U32_D0I3_CAPS,
 	SKL_TKN_U32_DMA_BUF_SIZE,
-	SKL_TKN_MAX = SKL_TKN_U32_DMA_BUF_SIZE,
+
+	SKL_TKN_U32_PIPE_DIRECTION,
+	SKL_TKN_U32_PIPE_CONFIG_ID,
+	SKL_TKN_U32_NUM_CONFIGS,
+	SKL_TKN_U32_PATH_MEM_PGS,
+
+	SKL_TKN_U32_CFG_FREQ,
+	SKL_TKN_U8_CFG_CHAN,
+	SKL_TKN_U8_CFG_BPS,
+	SKL_TKN_CFG_MOD_RES_ID,
+	SKL_TKN_CFG_MOD_FMT_ID,
+	SKL_TKN_U8_NUM_MOD,
+
+	SKL_TKN_MM_U8_MOD_IDX,
+	SKL_TKN_MM_U8_NUM_RES,
+	SKL_TKN_MM_U8_NUM_INTF,
+	SKL_TKN_MM_U32_RES_ID,
+	SKL_TKN_MM_U32_CPS,
+	SKL_TKN_MM_U32_DMA_SIZE,
+	SKL_TKN_MM_U32_CPC,
+	SKL_TKN_MM_U32_RES_PIN_ID,
+	SKL_TKN_MM_U32_INTF_PIN_ID,
+	SKL_TKN_MM_U32_PIN_BUF,
+	SKL_TKN_MM_U32_FMT_ID,
+	SKL_TKN_MM_U32_NUM_IN_FMT,
+	SKL_TKN_MM_U32_NUM_OUT_FMT,
+
+	SKL_TKN_MAX = SKL_TKN_MM_U32_NUM_OUT_FMT,
 };
 
 #endif
-- 
cgit v1.2.3


From 38ee7f2d47565689f35662d488d25e7afc43477d Mon Sep 17 00:00:00 2001
From: David Lebrun <david.lebrun@uclouvain.be>
Date: Fri, 25 Aug 2017 09:56:45 +0200
Subject: ipv6: sr: add support for encapsulation of L2 frames

This patch implements the L2 frame encapsulation mechanism, referred to
as T.Encaps.L2 in the SRv6 specifications [1].

A new type of SRv6 tunnel mode is added (SEG6_IPTUN_MODE_L2ENCAP). It only
accepts packets with an existing MAC header (i.e., it will not work for
locally generated packets). The resulting packet looks like IPv6 -> SRH ->
Ethernet -> original L3 payload. The next header field of the SRH is set to
NEXTHDR_NONE.

[1] https://tools.ietf.org/html/draft-filsfils-spring-srv6-network-programming-01

Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/seg6_iptunnel.h | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h
index b6e5a0a1afd7..b23df9f58354 100644
--- a/include/uapi/linux/seg6_iptunnel.h
+++ b/include/uapi/linux/seg6_iptunnel.h
@@ -33,16 +33,26 @@ struct seg6_iptunnel_encap {
 enum {
 	SEG6_IPTUN_MODE_INLINE,
 	SEG6_IPTUN_MODE_ENCAP,
+	SEG6_IPTUN_MODE_L2ENCAP,
 };
 
 #ifdef __KERNEL__
 
 static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
 {
-	int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP);
-
-	return ((tuninfo->srh->hdrlen + 1) << 3) +
-	       (encap * sizeof(struct ipv6hdr));
+	int head = 0;
+
+	switch (tuninfo->mode) {
+	case SEG6_IPTUN_MODE_INLINE:
+		break;
+	case SEG6_IPTUN_MODE_ENCAP:
+		head = sizeof(struct ipv6hdr);
+		break;
+	case SEG6_IPTUN_MODE_L2ENCAP:
+		return 0;
+	}
+
+	return ((tuninfo->srh->hdrlen + 1) << 3) + head;
 }
 
 #endif
-- 
cgit v1.2.3


From ddc088238cd6988bb4ac3776f403d7ff9d3c7a63 Mon Sep 17 00:00:00 2001
From: Pawel Baldysiak <pawel.baldysiak@intel.com>
Date: Wed, 16 Aug 2017 17:13:45 +0200
Subject: md: Runtime support for multiple ppls

Increase PPL area to 1MB and use it as circular buffer to store PPL. The
entry with highest generation number is the latest one. If PPL to be
written is larger then space left in a buffer, rewind the buffer to the
start (don't wrap it).

Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 include/uapi/linux/raid/md_p.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index d500bd224979..b9197976b660 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -324,9 +324,10 @@ struct mdp_superblock_1 {
 #define	MD_FEATURE_RECOVERY_BITMAP	128 /* recovery that is happening
 					     * is guided by bitmap.
 					     */
-#define MD_FEATURE_CLUSTERED		256 /* clustered MD */
+#define	MD_FEATURE_CLUSTERED		256 /* clustered MD */
 #define	MD_FEATURE_JOURNAL		512 /* support write cache */
 #define	MD_FEATURE_PPL			1024 /* support PPL */
+#define	MD_FEATURE_MULTIPLE_PPLS	2048 /* support for multiple PPLs */
 #define	MD_FEATURE_ALL			(MD_FEATURE_BITMAP_OFFSET	\
 					|MD_FEATURE_RECOVERY_OFFSET	\
 					|MD_FEATURE_RESHAPE_ACTIVE	\
@@ -338,6 +339,7 @@ struct mdp_superblock_1 {
 					|MD_FEATURE_CLUSTERED		\
 					|MD_FEATURE_JOURNAL		\
 					|MD_FEATURE_PPL			\
+					|MD_FEATURE_MULTIPLE_PPLS	\
 					)
 
 struct r5l_payload_header {
-- 
cgit v1.2.3


From 7a14724f54bf9889fcb1a9f1d4aa4e1d2e969d93 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 28 Aug 2017 08:33:20 -0700
Subject: libnvdimm: clean up command definitions

Remove the command payloads that do not have an associated libnvdimm
ioctl. I.e. remove the payloads that would only ever be carried in the
ND_CMD_CALL envelope. This prevents userspace from growing unnecessary
dependencies on this kernel header when userspace already has everything
it needs to craft and send these commands.

Cc: Jerry Hoemann <jerry.hoemann@hpe.com>
Reported-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/uapi/linux/ndctl.h | 37 -------------------------------------
 1 file changed, 37 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 6d3c54264d8e..3f03567631cb 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -145,43 +145,6 @@ struct nd_cmd_clear_error {
 	__u64 cleared;
 } __packed;
 
-struct nd_cmd_trans_spa {
-	__u64 spa;
-	__u32 status;
-	__u8  flags;
-	__u8  _reserved[3];
-	__u64 trans_length;
-	__u32 num_nvdimms;
-	struct nd_nvdimm_device {
-		__u32 nfit_device_handle;
-		__u32 _reserved;
-		__u64 dpa;
-	} __packed devices[0];
-
-} __packed;
-
-struct nd_cmd_ars_err_inj {
-	__u64 err_inj_spa_range_base;
-	__u64 err_inj_spa_range_length;
-	__u8  err_inj_options;
-	__u32 status;
-} __packed;
-
-struct nd_cmd_ars_err_inj_clr {
-	__u64 err_inj_clr_spa_range_base;
-	__u64 err_inj_clr_spa_range_length;
-	__u32 status;
-} __packed;
-
-struct nd_cmd_ars_err_inj_stat {
-	__u32 status;
-	__u32 inj_err_rec_count;
-	struct nd_error_stat_query_record {
-		__u64 err_inj_stat_spa_range_base;
-		__u64 err_inj_stat_spa_range_length;
-	} __packed record[0];
-} __packed;
-
 enum {
 	ND_CMD_IMPLEMENTED = 0,
 
-- 
cgit v1.2.3


From 2cfa0bb25d25aa183ea29f1f9c2bc65f3f2c2264 Mon Sep 17 00:00:00 2001
From: Sinclair Yeh <syeh@vmware.com>
Date: Wed, 5 Jul 2017 01:37:55 -0700
Subject: drm/vmwgfx: Prepare to support fence fd

Make the fields and flags available.

Signed-off-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Deepak Singh Rawat <drawat@vmware.com>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 include/uapi/drm/vmwgfx_drm.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index d9dfde9aa757..0bc784f5e0db 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -297,13 +297,17 @@ union drm_vmw_surface_reference_arg {
  * @version: Allows expanding the execbuf ioctl parameters without breaking
  * backwards compatibility, since user-space will always tell the kernel
  * which version it uses.
- * @flags: Execbuf flags. None currently.
+ * @flags: Execbuf flags.
+ * @imported_fence_fd:  FD for a fence imported from another device
  *
  * Argument to the DRM_VMW_EXECBUF Ioctl.
  */
 
 #define DRM_VMW_EXECBUF_VERSION 2
 
+#define DRM_VMW_EXECBUF_FLAG_IMPORT_FENCE_FD (1 << 0)
+#define DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD (1 << 1)
+
 struct drm_vmw_execbuf_arg {
 	__u64 commands;
 	__u32 command_size;
@@ -312,7 +316,7 @@ struct drm_vmw_execbuf_arg {
 	__u32 version;
 	__u32 flags;
 	__u32 context_handle;
-	__u32 pad64;
+	__s32 imported_fence_fd;
 };
 
 /**
@@ -328,6 +332,7 @@ struct drm_vmw_execbuf_arg {
  * @passed_seqno: The highest seqno number processed by the hardware
  * so far. This can be used to mark user-space fence objects as signaled, and
  * to determine whether a fence seqno might be stale.
+ * @fd: FD associated with the fence, -1 if not exported
  * @error: This member should've been set to -EFAULT on submission.
  * The following actions should be take on completion:
  * error == -EFAULT: Fence communication failed. The host is synchronized.
@@ -345,7 +350,7 @@ struct drm_vmw_fence_rep {
 	__u32 mask;
 	__u32 seqno;
 	__u32 passed_seqno;
-	__u32 pad64;
+	__s32 fd;
 	__s32 error;
 };
 
-- 
cgit v1.2.3


From 464bc0fd6273d518aee79fbd37211dd9bc35d863 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 28 Aug 2017 07:10:04 -0700
Subject: bpf: convert sockmap field attach_bpf_fd2 to type

In the initial sockmap API we provided strparser and verdict programs
using a single attach command by extending the attach API with a the
attach_bpf_fd2 field.

However, if we add other programs in the future we will be adding a
field for every new possible type, attach_bpf_fd(3,4,..). This
seems a bit clumsy for an API. So lets push the programs using two
new type fields.

   BPF_SK_SKB_STREAM_PARSER
   BPF_SK_SKB_STREAM_VERDICT

This has the advantage of having a readable name and can easily be
extended in the future.

Updates to samples and sockmap included here also generalize tests
slightly to support upcoming patch for multiple map support.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 843818dff96d..97227be3690c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -136,7 +136,8 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET_EGRESS,
 	BPF_CGROUP_INET_SOCK_CREATE,
 	BPF_CGROUP_SOCK_OPS,
-	BPF_CGROUP_SMAP_INGRESS,
+	BPF_SK_SKB_STREAM_PARSER,
+	BPF_SK_SKB_STREAM_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -224,7 +225,6 @@ union bpf_attr {
 		__u32		attach_bpf_fd;	/* eBPF program to attach */
 		__u32		attach_type;
 		__u32		attach_flags;
-		__u32		attach_bpf_fd2;
 	};
 
 	struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
@@ -580,14 +580,11 @@ union bpf_attr {
  *     @flags: reserved for future use
  *     Return: SK_REDIRECT
  *
- * int bpf_sock_map_update(skops, map, key, flags, map_flags)
+ * int bpf_sock_map_update(skops, map, key, flags)
  *	@skops: pointer to bpf_sock_ops
  *	@map: pointer to sockmap to update
  *	@key: key to insert/update sock in map
  *	@flags: same flags as map update elem
- *	@map_flags: sock map specific flags
- *	   bit 1: Enable strparser
- *	   other bits: reserved
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
-- 
cgit v1.2.3


From 2f857d04601a1bb56958b95a9f180bce0e91e5e6 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 28 Aug 2017 07:10:25 -0700
Subject: bpf: sockmap, remove STRPARSER map_flags and add multi-map support

The addition of map_flags BPF_SOCKMAP_STRPARSER flags was to handle a
specific use case where we want to have BPF parse program disabled on
an entry in a sockmap.

However, Alexei found the API a bit cumbersome and I agreed. Lets
remove the STRPARSER flag and support the use case by allowing socks
to be in multiple maps. This allows users to create two maps one with
programs attached and one without. When socks are added to maps they
now inherit any programs attached to the map. This is a nice
generalization and IMO improves the API.

The API rules are less ambiguous and do not need a flag:

  - When a sock is added to a sockmap we have two cases,

     i. The sock map does not have any attached programs so
        we can add sock to map without inheriting bpf programs.
        The sock may exist in 0 or more other maps.

    ii. The sock map has an attached BPF program. To avoid duplicate
        bpf programs we only add the sock entry if it does not have
        an existing strparser/verdict attached, returning -EBUSY if
        a program is already attached. Otherwise attach the program
        and inherit strparser/verdict programs from the sock map.

This allows for socks to be in a multiple maps for redirects and
inherit a BPF program from a single map.

Also this patch simplifies the logic around BPF_{EXIST|NOEXIST|ANY}
flags. In the original patch I tried to be extra clever and only
update map entries when necessary. Now I've decided the complexity
is not worth it. If users constantly update an entry with the same
sock for no reason (i.e. update an entry without actually changing
any parameters on map or sock) we still do an alloc/release. Using
this and allowing multiple entries of a sock to exist in a map the
logic becomes much simpler.

Note: Now that multiple maps are supported the "maps" pointer called
when a socket is closed becomes a list of maps to remove the sock from.
To keep the map up to date when a sock is added to the sockmap we must
add the map/elem in the list. Likewise when it is removed we must
remove it from the list. This results in searching the per psock list
on delete operation. On TCP_CLOSE events we walk the list and remove
the psock from all map/entry locations. I don't see any perf
implications in this because at most I have a psock in two maps. If
a psock were to be in many maps its possibly this might be noticeable
on delete but I can't think of a reason to dup a psock in many maps.
The sk_callback_lock is used to protect read/writes to the list. This
was convenient because in all locations we were taking the lock
anyways just after working on the list. Also the lock is per sock so
in normal cases we shouldn't see any contention.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 97227be3690c..08c206a863e1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -143,9 +143,6 @@ enum bpf_attach_type {
 
 #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
 
-/* If BPF_SOCKMAP_STRPARSER is used sockmap will use strparser on receive */
-#define BPF_SOCKMAP_STRPARSER	(1U << 0)
-
 /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
  * to the given target_fd cgroup the descendent cgroup will be able to
  * override effective bpf program that was inherited from this cgroup
-- 
cgit v1.2.3


From 63e8d4394a2d226803f47abd7287dbb6d21bf8e4 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 22 Aug 2017 16:58:20 +0300
Subject: serial: pch_uart: Make port type explicit

It used to be a gap in port definitions after PORT_MAX_8250. Since the
new drivers are coming the gap become shorter and shorter until the
commit a2d6a987bfe4 ("serial: 8250: Add new port type for TI DA8xx/66AK2x")
completely removed it.

So, while type here is just a formality, make things a little bit more
explicit for this driver and move port types to UAPI header. Note,
it uses two types for now.

Fixes: fddceb8b5399 ("tty: 8250: Add 64byte UART support for FSL platforms")
Cc: Priyanka Jain <Priyanka.Jain@freescale.com>
Cc: Poonam Aggrwal <poonam.aggrwal@freescale.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/serial_core.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index 38bea3217ead..502aa23c7e15 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -57,7 +57,6 @@
 #define PORT_RT2880	29	/* Ralink RT2880 internal UART */
 #define PORT_16550A_FSL64 30	/* Freescale 16550 UART with 64 FIFOs */
 #define PORT_DA830	31	/* TI DA8xx/66AK2x */
-#define PORT_MAX_8250	31	/* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
@@ -77,6 +76,10 @@
 #define PORT_SUNZILOG	38
 #define PORT_SUNSAB	39
 
+/* Intel EG20 */
+#define PORT_PCH_8LINE	44
+#define PORT_PCH_2LINE	45
+
 /* DEC */
 #define PORT_DZ		46
 #define PORT_ZS		47
-- 
cgit v1.2.3


From 3f3dac7e4d815cb7f929c0ed98c3a45a86852e53 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 22 Aug 2017 16:58:21 +0300
Subject: serial: Remove unused port type

PORT_MFD is not in use since commit

	1bd187de5364 ("x86, intel-mid: remove Intel MID specific serial support")

Remove leftover.

Fixes: 1bd187de5364 ("x86, intel-mid: remove Intel MID specific serial support")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/serial_core.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index 502aa23c7e15..00d335634271 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -209,9 +209,6 @@
 /* MAX310X */
 #define PORT_MAX310X	94
 
-/* High Speed UART for Medfield */
-#define PORT_MFD	95
-
 /* TI OMAP-UART */
 #define PORT_OMAP	96
 
-- 
cgit v1.2.3


From ee1c90cc2cea80638f559c552371ee6893ca9d9e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 22 Aug 2017 16:58:22 +0300
Subject: serial: Fix port type numbering for TI DA8xx

The UAPI has a global list of unique numbers for different port types.
The commit
	a2d6a987bfe4 ("serial: 8250: Add new port type for TI DA8xx/66AK2x")
introduced a new port type and brought the collision with two other port
types.

Reuse 95 for it instead.

Fixes: a2d6a987bfe4 ("serial: 8250: Add new port type for TI DA8xx/66AK2x")
Cc: David Lechner <david@lechnology.com>
Cc: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/serial_core.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index 00d335634271..dc2d7cb766ab 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -56,7 +56,6 @@
 #define PORT_ALTR_16550_F128 28 /* Altera 16550 UART with 128 FIFOs */
 #define PORT_RT2880	29	/* Ralink RT2880 internal UART */
 #define PORT_16550A_FSL64 30	/* Freescale 16550 UART with 64 FIFOs */
-#define PORT_DA830	31	/* TI DA8xx/66AK2x */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
@@ -209,6 +208,9 @@
 /* MAX310X */
 #define PORT_MAX310X	94
 
+/* TI DA8xx/66AK2x */
+#define PORT_DA830	95
+
 /* TI OMAP-UART */
 #define PORT_OMAP	96
 
-- 
cgit v1.2.3


From 1c16ae65e2502da05310b2ec56b3a1fd3efe6f4d Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Mon, 21 Aug 2017 01:17:56 +0800
Subject: serial: 8250: of: Add new port type for MediaTek BTIF controller on
 MT7622/23 SoC

MediaTek BTIF controller is the serial interface similar to UART but it
works only as the digital device which is mainly used to communicate with
the connectivity module called CONNSYS inside the SoC which could be mostly
found on those MediaTek SoCs with Bluetooth feature such as MT7622 and
MT7623 SoCs.

And the controller is made as being compatible with the 8250 register
layout with extra registers such as DMA enablement so it tends to be
integrated with reusing 8250 OF driver. However, DMA mode is not being
supported yet in the current driver.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/serial_core.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index dc2d7cb766ab..50d71c436323 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -274,4 +274,7 @@
 /* MPS2 UART */
 #define PORT_MPS2UART	116
 
+/* MediaTek BTIF */
+#define PORT_MTK_BTIF	117
+
 #endif /* _UAPILINUX_SERIAL_CORE_H */
-- 
cgit v1.2.3


From 5e60a10eaebab93f823295cd7ec3848ba3b6e553 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 25 Aug 2017 10:52:22 -0700
Subject: drm/syncobj: add sync obj wait interface. (v8)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This interface will allow sync object to be used to back
Vulkan fences. This API is pretty much the vulkan fence waiting
API, and I've ported the code from amdgpu.

v2: accept relative timeout, pass remaining time back
to userspace.
v3: return to absolute timeouts.
v4: absolute zero = poll,
    rewrite any/all code to have same operation for arrays
    return -EINVAL for 0 fences.
v4.1: fixup fences allocation check, use u64_to_user_ptr
v5: move to sec/nsec, and use timespec64 for calcs.
v6: use -ETIME and drop the out status flag. (-ETIME
is suggested by ickle, I can feel a shed painting)
v7: talked to Daniel/Arnd, use ktime and ns everywhere.
v8: be more careful in the timeout calculations
    use uint32_t for counter variables so we don't overflow
    graciously handle -ENOINT being returned from dma_fence_wait_timeout

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/uapi/drm/drm.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 101593ab10ac..0757c1a41821 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -718,6 +718,17 @@ struct drm_syncobj_handle {
 	__u32 pad;
 };
 
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+struct drm_syncobj_wait {
+	__u64 handles;
+	/* absolute timeout */
+	__s64 timeout_nsec;
+	__u32 count_handles;
+	__u32 flags;
+	__u32 first_signaled; /* only valid when not waiting all */
+	__u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
@@ -840,6 +851,7 @@ extern "C" {
 #define DRM_IOCTL_SYNCOBJ_DESTROY	DRM_IOWR(0xC0, struct drm_syncobj_destroy)
 #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD	DRM_IOWR(0xC1, struct drm_syncobj_handle)
 #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE	DRM_IOWR(0xC2, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_WAIT		DRM_IOWR(0xC3, struct drm_syncobj_wait)
 
 /**
  * Device specific ioctls should only be in their respective headers
-- 
cgit v1.2.3


From 1fc08218ed2a42c86af5c905fe4c00885376a07e Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Fri, 25 Aug 2017 10:52:25 -0700
Subject: drm/syncobj: Add a CREATE_SIGNALED flag

This requests that the driver create the sync object such that it
already has a signaled dma_fence attached.  Because we don't need
anything in particular (just something signaled), we use a dummy null
fence.  This is useful for Vulkan which has a similar flag that can be
passed to vkCreateFence.

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/uapi/drm/drm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 0757c1a41821..ade7f68d32b5 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -700,6 +700,7 @@ struct drm_prime_handle {
 
 struct drm_syncobj_create {
 	__u32 handle;
+#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
 	__u32 flags;
 };
 
-- 
cgit v1.2.3


From e7aca5031a2fb51b6120864d0eff5478c95e6651 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Fri, 25 Aug 2017 10:52:24 -0700
Subject: drm/syncobj: Allow wait for submit and signal behavior (v5)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Vulkan VkFence semantics require that the application be able to perform
a CPU wait on work which may not yet have been submitted.  This is
perfectly safe because the CPU wait has a timeout which will get
triggered eventually if no work is ever submitted.  This behavior is
advantageous for multi-threaded workloads because, so long as all of the
threads agree on what fences to use up-front, you don't have the extra
cross-thread synchronization cost of thread A telling thread B that it
has submitted its dependent work and thread B is now free to wait.

Within a single process, this can be implemented in the userspace driver
by doing exactly the same kind of tracking the app would have to do
using posix condition variables or similar.  However, in order for this
to work cross-process (as is required by VK_KHR_external_fence), we need
to handle this in the kernel.

This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
instructs the IOCTL to wait for the syncobj to have a non-null fence and
then wait on the fence.  Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
easily get the Vulkan behavior.

v2:
 - Fix a bug in the invalid syncobj error path
 - Unify the wait-all and wait-any cases
v3:
 - Unify the timeout == 0 case a bit with the timeout > 0 case
 - Use wait_event_interruptible_timeout
v4:
 - Use proxy fence
v5:
 - Revert to a combination of v2 and v3
 - Don't use proxy fences
 - Don't use wait_event_interruptible_timeout because it just adds an
   extra layer of callbacks

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Christian König <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/uapi/drm/drm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index ade7f68d32b5..4c746597225e 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -720,6 +720,7 @@ struct drm_syncobj_handle {
 };
 
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
 struct drm_syncobj_wait {
 	__u64 handles;
 	/* absolute timeout */
-- 
cgit v1.2.3


From aa4035d2c7683d2f2fb0ffe8087abd9eabf6d54a Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Mon, 28 Aug 2017 14:10:27 -0700
Subject: drm/syncobj: Add a reset ioctl (v3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This just resets the dma_fence to NULL so it looks like it's never been
signaled.  This will be useful once we add the new wait API for allowing
wait on "submit and signal" behavior.

v2:
 - Take an array of sync objects (Dave Airlie)
v3:
 - Throw -EINVAL if pad != 0

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Christian König <christian.koenig@amd.com> (v1)
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/uapi/drm/drm.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 4c746597225e..b037fdf9e43b 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -731,6 +731,12 @@ struct drm_syncobj_wait {
 	__u32 pad;
 };
 
+struct drm_syncobj_array {
+	__u64 handles;
+	__u32 count_handles;
+	__u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
@@ -854,6 +860,7 @@ extern "C" {
 #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD	DRM_IOWR(0xC1, struct drm_syncobj_handle)
 #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE	DRM_IOWR(0xC2, struct drm_syncobj_handle)
 #define DRM_IOCTL_SYNCOBJ_WAIT		DRM_IOWR(0xC3, struct drm_syncobj_wait)
+#define DRM_IOCTL_SYNCOBJ_RESET		DRM_IOWR(0xC4, struct drm_syncobj_array)
 
 /**
  * Device specific ioctls should only be in their respective headers
-- 
cgit v1.2.3


From ffa9443fb3d3eddf0fdf6ac473dc8b5c87f08f15 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Mon, 28 Aug 2017 14:10:28 -0700
Subject: drm/syncobj: Add a signal ioctl (v3)

This IOCTL provides a mechanism for userspace to trigger a sync object
directly.  There are other ways that userspace can trigger a syncobj
such as submitting a dummy batch somewhere or hanging on to a triggered
sync_file and doing an import.  This just provides an easy way to
manually trigger the sync object without weird hacks.

The motivation for this IOCTL is Vulkan fences.  Vulkan lets you create
a fence already in the signaled state so that you can wait on it
immediatly without stalling.  We could also handle this with a new
create flag to ask the driver to create a syncobj that is already
signaled but the IOCTL seemed a bit cleaner and more generic.

v2:
 - Take an array of sync objects (Dave Airlie)
v3:
 - Throw -EINVAL if pad != 0

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/uapi/drm/drm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index b037fdf9e43b..97677cd6964d 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -861,6 +861,7 @@ extern "C" {
 #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE	DRM_IOWR(0xC2, struct drm_syncobj_handle)
 #define DRM_IOCTL_SYNCOBJ_WAIT		DRM_IOWR(0xC3, struct drm_syncobj_wait)
 #define DRM_IOCTL_SYNCOBJ_RESET		DRM_IOWR(0xC4, struct drm_syncobj_array)
+#define DRM_IOCTL_SYNCOBJ_SIGNAL	DRM_IOWR(0xC5, struct drm_syncobj_array)
 
 /**
  * Device specific ioctls should only be in their respective headers
-- 
cgit v1.2.3


From 9382d4e1d3c09fe20fa53eb12b51ef01ad40774f Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Thu, 17 Aug 2017 15:52:06 +0300
Subject: IB/uverbs: Add XRQ creation parameter to UAPI

Add tm_list_size parameter to struct ib_uverbs_create_xsrq.
If SRQ type is tag-matching this field defines maximum size
of tag matching list. Otherwise, it is expected to be zero.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Reviewed-by: Yossi Itigin <yosefe@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/ib_user_verbs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 63656d2e8705..d5434bbf40c8 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -1024,7 +1024,7 @@ struct ib_uverbs_create_xsrq {
 	__u32 max_wr;
 	__u32 max_sge;
 	__u32 srq_limit;
-	__u32 reserved;
+	__u32 max_num_tags;
 	__u32 xrcd_handle;
 	__u32 cq_handle;
 	__u64 driver_data[0];
-- 
cgit v1.2.3


From 8d50505ada728258fcdce99120b937ce68298c4e Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Thu, 17 Aug 2017 15:52:08 +0300
Subject: IB/uverbs: Expose XRQ capabilities

Make XRQ capabilities available via ibv_query_device() verb.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Reviewed-by: Yossi Itigin <yosefe@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/ib_user_verbs.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index d5434bbf40c8..9a0b6479fe0c 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -236,6 +236,20 @@ struct ib_uverbs_rss_caps {
 	__u32 reserved;
 };
 
+struct ib_uverbs_tm_caps {
+	/* Max size of rendezvous request message */
+	__u32 max_rndv_hdr_size;
+	/* Max number of entries in tag matching list */
+	__u32 max_num_tags;
+	/* TM flags */
+	__u32 flags;
+	/* Max number of outstanding list operations */
+	__u32 max_ops;
+	/* Max number of SGE in tag matching entry */
+	__u32 max_sge;
+	__u32 reserved;
+};
+
 struct ib_uverbs_ex_query_device_resp {
 	struct ib_uverbs_query_device_resp base;
 	__u32 comp_mask;
@@ -247,6 +261,7 @@ struct ib_uverbs_ex_query_device_resp {
 	struct ib_uverbs_rss_caps rss_caps;
 	__u32  max_wq_type_rq;
 	__u32 raw_packet_caps;
+	struct ib_uverbs_tm_caps xrq_caps;
 };
 
 struct ib_uverbs_query_port {
-- 
cgit v1.2.3


From fc7ce9c74c3ad232b084d80148654f926d01ece7 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Mon, 28 Aug 2017 20:52:49 -0400
Subject: perf/core, x86: Add PERF_SAMPLE_PHYS_ADDR

For understanding how the workload maps to memory channels and hardware
behavior, it's very important to collect address maps with physical
addresses. For example, 3D XPoint access can only be found by filtering
the physical address.

Add a new sample type for physical address.

perf already has a facility to collect data virtual address. This patch
introduces a function to convert the virtual address to physical address.
The function is quite generic and can be extended to any architecture as
long as a virtual address is provided.

 - For kernel direct mapping addresses, virt_to_phys is used to convert
   the virtual addresses to physical address.

 - For user virtual addresses, __get_user_pages_fast is used to walk the
   pages tables for user physical address.

 - This does not work for vmalloc addresses right now. These are not
   resolved, but code to do that could be added.

The new sample type requires collecting the virtual address. The
virtual address will not be output unless SAMPLE_ADDR is applied.

For security, the physical address can only be exposed to root or
privileged user.

Tested-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: mpe@ellerman.id.au
Link: http://lkml.kernel.org/r/1503967969-48278-1-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 2a37ae925d85..140ae638cfd6 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -139,8 +139,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
 	PERF_SAMPLE_TRANSACTION			= 1U << 17,
 	PERF_SAMPLE_REGS_INTR			= 1U << 18,
+	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
 
-	PERF_SAMPLE_MAX = 1U << 19,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 20,		/* non-ABI */
 };
 
 /*
@@ -814,6 +815,7 @@ enum perf_event_type {
 	 *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
 	 *	{ u64			abi; # enum perf_sample_regs_abi
 	 *	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
+	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
-- 
cgit v1.2.3


From 2804fd3af6ba5ae5737705b27146455eabe2e2f8 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aring@mojatatu.com>
Date: Mon, 28 Aug 2017 15:03:13 -0400
Subject: if_ether: add forces ife lfb type

This patch adds the forces IFE lfb type according to IEEE registered
ethertypes. See http://standards-oui.ieee.org/ethertype/eth.txt for more
information. Since there exists the IFE subsystem it can be used there.

This patch also use the correct word "ForCES" instead of "FoRCES" which
is a spelling error inside the IEEE ethertype specification.

Signed-off-by: Alexander Aring <aring@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index efeb1190c2ca..f68f6bf4a253 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -104,6 +104,7 @@
 #define ETH_P_QINQ2	0x9200		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_QINQ3	0x9300		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_EDSA	0xDADA		/* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_IFE	0xED3E		/* ForCES inter-FE LFB type */
 #define ETH_P_AF_IUCV   0xFBFB		/* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
 
 #define ETH_P_802_3_MIN	0x0600		/* If the value in the ethernet type is less than this value
-- 
cgit v1.2.3


From 155e6f649757c902901e599c268f8b575ddac1f8 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Mon, 28 Aug 2017 21:43:21 +0200
Subject: ether: add NSH ethertype

The NSH draft says:

   An IEEE EtherType, 0x894F, has been allocated for NSH.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index f68f6bf4a253..61f7ccce5b69 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -99,6 +99,7 @@
 #define ETH_P_FIP	0x8914		/* FCoE Initialization Protocol */
 #define ETH_P_80221	0x8917		/* IEEE 802.21 Media Independent Handover Protocol */
 #define ETH_P_HSR	0x892F		/* IEC 62439-3 HSRv1	*/
+#define ETH_P_NSH	0x894F		/* Network Service Header */
 #define ETH_P_LOOPBACK	0x9000		/* Ethernet loopback packet, per IEEE 802.3 */
 #define ETH_P_QINQ1	0x9100		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_QINQ2	0x9200		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
-- 
cgit v1.2.3


From 31770e34e43d6c8dee129bfee77e56c34e61f0e5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 30 Aug 2017 19:24:58 +0200
Subject: tcp: Revert "tcp: remove header prediction"

This reverts commit 45f119bf936b1f9f546a0b139c5b56f9bb2bdc78.

Eric Dumazet says:
  We found at Google a significant regression caused by
  45f119bf936b1f9f546a0b139c5b56f9bb2bdc78 tcp: remove header prediction

  In typical RPC  (TCP_RR), when a TCP socket receives data, we now call
  tcp_ack() while we used to not call it.

  This touches enough cache lines to cause a slowdown.

so problem does not seem to be HP removal itself but the tcp_ack()
call.  Therefore, it might be possible to remove HP after all, provided
one finds a way to elide tcp_ack for most cases.

Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/snmp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index b3f346fb9fe3..758f12b58541 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -184,7 +184,9 @@ enum
 	LINUX_MIB_DELAYEDACKLOST,		/* DelayedACKLost */
 	LINUX_MIB_LISTENOVERFLOWS,		/* ListenOverflows */
 	LINUX_MIB_LISTENDROPS,			/* ListenDrops */
+	LINUX_MIB_TCPHPHITS,			/* TCPHPHits */
 	LINUX_MIB_TCPPUREACKS,			/* TCPPureAcks */
+	LINUX_MIB_TCPHPACKS,			/* TCPHPAcks */
 	LINUX_MIB_TCPRENORECOVERY,		/* TCPRenoRecovery */
 	LINUX_MIB_TCPSACKRECOVERY,		/* TCPSackRecovery */
 	LINUX_MIB_TCPSACKRENEGING,		/* TCPSACKReneging */
-- 
cgit v1.2.3


From 7373ae7e8f0bf2c0718422481da986db5058b005 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Tue, 29 Aug 2017 22:44:16 -0600
Subject: net: ether: Add support for multiplexing and aggregation type

Define the Qualcomm multiplexing and aggregation (MAP) ether type 0x00F9.
This is needed for receiving data in the MAP protocol like RMNET. This is
not an officially registered ID.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 61f7ccce5b69..9037065e23d0 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -140,6 +140,9 @@
 #define ETH_P_IEEE802154 0x00F6		/* IEEE802.15.4 frame		*/
 #define ETH_P_CAIF	0x00F7		/* ST-Ericsson CAIF protocol	*/
 #define ETH_P_XDSA	0x00F8		/* Multiplexed DSA protocol	*/
+#define ETH_P_MAP	0x00F9		/* Qualcomm multiplexing and
+					 * aggregation protocol
+					 */
 
 /*
  *	This is an Ethernet frame header.
-- 
cgit v1.2.3


From cdf4969c42a6c1a376dd03a9e846cf638d3cd4b1 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Tue, 29 Aug 2017 22:44:17 -0600
Subject: net: arp: Add support for raw IP device

Define the raw IP type. This is needed for raw IP net devices
like rmnet.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_arp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h
index cf73510b9238..a2a635620600 100644
--- a/include/uapi/linux/if_arp.h
+++ b/include/uapi/linux/if_arp.h
@@ -59,6 +59,7 @@
 #define ARPHRD_LAPB	516		/* LAPB				*/
 #define ARPHRD_DDCMP    517		/* Digital's DDCMP protocol     */
 #define ARPHRD_RAWHDLC	518		/* Raw HDLC			*/
+#define ARPHRD_RAWIP    519		/* Raw IP                       */
 
 #define ARPHRD_TUNNEL	768		/* IPIP tunnel			*/
 #define ARPHRD_TUNNEL6	769		/* IP6IP6 tunnel       		*/
-- 
cgit v1.2.3


From e3bfed1df379c18f20feb06427d952b766e2c00f Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Fri, 25 Aug 2017 19:53:39 +1000
Subject: KVM: PPC: Book3S HV: Report storage key support to userspace

This adds information about storage keys to the struct returned by
the KVM_PPC_GET_SMMU_INFO ioctl.  The new fields replace a pad field,
which was zeroed by previous kernel versions.  Thus userspace that
knows about the new fields will see zeroes when running on an older
kernel, indicating that storage keys are not supported.  The size of
the structure has not changed.

The number of keys is hard-coded for the CPUs supported by HV KVM,
which is just POWER7, POWER8 and POWER9.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 include/uapi/linux/kvm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6cd63c18708a..838887587411 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size {
 struct kvm_ppc_smmu_info {
 	__u64 flags;
 	__u32 slb_size;
-	__u32 pad;
+	__u16 data_keys;	/* # storage keys supported for data */
+	__u16 instr_keys;	/* # storage keys supported for instructions */
 	struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
 };
 
-- 
cgit v1.2.3


From 72f9b089ecd2cc2194d27cbb14fd80a0b1472e89 Mon Sep 17 00:00:00 2001
From: Aditya Sarwade <asarwade@vmware.com>
Date: Tue, 29 Aug 2017 15:51:29 -0700
Subject: RDMA/vmw_pvrdma: Report network header type in WC

We should report the network header type in the work completion so that
the kernel can infer the right RoCE type headers.

Reviewed-by: Bryan Tan <bryantan@vmware.com>
Signed-off-by: Aditya Sarwade <asarwade@vmware.com>
Signed-off-by: Adit Ranadive <aditr@vmware.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/vmw_pvrdma-abi.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h
index c8c1d2d6df4d..c6569b0032ec 100644
--- a/include/uapi/rdma/vmw_pvrdma-abi.h
+++ b/include/uapi/rdma/vmw_pvrdma-abi.h
@@ -125,7 +125,8 @@ enum pvrdma_wc_flags {
 	PVRDMA_WC_IP_CSUM_OK		= 1 << 3,
 	PVRDMA_WC_WITH_SMAC		= 1 << 4,
 	PVRDMA_WC_WITH_VLAN		= 1 << 5,
-	PVRDMA_WC_FLAGS_MAX		= PVRDMA_WC_WITH_VLAN,
+	PVRDMA_WC_WITH_NETWORK_HDR_TYPE	= 1 << 6,
+	PVRDMA_WC_FLAGS_MAX		= PVRDMA_WC_WITH_NETWORK_HDR_TYPE,
 };
 
 struct pvrdma_alloc_ucontext_resp {
@@ -283,7 +284,8 @@ struct pvrdma_cqe {
 	__u8 dlid_path_bits;
 	__u8 port_num;
 	__u8 smac[6];
-	__u8 reserved2[7]; /* Pad to next power of 2 (64). */
+	__u8 network_hdr_type;
+	__u8 reserved2[6]; /* Pad to next power of 2 (64). */
 };
 
 #endif /* __VMW_PVRDMA_ABI_H__ */
-- 
cgit v1.2.3


From fac9658cabb98afb68ef1630c558864e6f559c07 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 3 Aug 2017 16:06:57 +0300
Subject: IB/core: Add new ioctl interface

In this ioctl interface, processing the command starts from
properties of the command and fetching the appropriate user objects
before calling the handler.

Parsing and validation is done according to a specifier declared by
the driver's code. In the driver, all supported objects are declared.
These objects are separated to different object namepsaces. Dividing
objects to namespaces is done at initialization by using the higher
bits of the object ids. This initialization can mix objects declared
in different places to one parsing tree using in this ioctl interface.

For each object we list all supported methods. Similarly to objects,
methods are separated to method namespaces too. Namespacing is done
similarly to the objects case. This could be used in order to add
methods to an existing object.

Each method has a specific handler, which could be either a default
handler or a driver specific handler.
Along with the handler, a bunch of attributes are specified as well.
Similarly to objects and method, attributes are namespaced and hashed
by their ids at initialization too. All supported attributes are
subject to automatic fetching and validation. These attributes include
the command, response and the method's related objects' ids.

When these entities (objects, methods and attributes) are used, the
high bits of the entities ids are used in order to calculate the hash
bucket index. Then, these high bits are masked out in order to have a
zero based index. Since we use these high bits for both bucketing and
namespacing, we get a compact representation and O(1) array access.
This is mandatory for efficient dispatching.

Each attribute has a type (PTR_IN, PTR_OUT, IDR and FD) and a length.
Attributes could be validated through some attributes, like:
(*) Minimum size / Exact size
(*) Fops for FD
(*) Object type for IDR

If an IDR/fd attribute is specified, the kernel also states the object
type and the required access (NEW, WRITE, READ or DESTROY).
All uobject/fd management is done automatically by the infrastructure,
meaning - the infrastructure will fail concurrent commands that at
least one of them requires concurrent access (WRITE/DESTROY),
synchronize actions with device removals (dissociate context events)
and take care of reference counting (increase/decrease) for concurrent
actions invocation. The reference counts on the actual kernel objects
shall be handled by the handlers.

 objects
+--------+
|        |
|        |   methods                                                                +--------+
|        |   ns         method      method_spec                           +-----+   |len     |
+--------+  +------+[d]+-------+   +----------------+[d]+------------+    |attr1+-> |type    |
| object +> |method+-> | spec  +-> +  attr_buckets  +-> |default_chain+--> +-----+   |idr_type|
+--------+  +------+   |handler|   |                |   +------------+    |attr2|   |access  |
|        |  |      |   +-------+   +----------------+   |driver chain|    +-----+   +--------+
|        |  |      |                                    +------------+
|        |  +------+
|        |
|        |
|        |
|        |
|        |
|        |
|        |
|        |
|        |
|        |
+--------+

[d] = Hash ids to groups using the high order bits

The right types table is also chosen by using the high bits from
the ids. Currently we have either default or driver specific groups.

Once validation and object fetching (or creation) completed, we call
the handler:
int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile,
               struct uverbs_attr_bundle *ctx);

ctx bundles attributes of different namespaces. Each element there
is an array of attributes which corresponds to one namespaces of
attributes. For example, in the usually used case:

 ctx                               core
+----------------------------+     +------------+
| core:                      +---> | valid      |
+----------------------------+     | cmd_attr   |
| driver:                    |     +------------+
|----------------------------+--+  | valid      |
                                |  | cmd_attr   |
                                |  +------------+
                                |  | valid      |
                                |  | obj_attr   |
                                |  +------------+
                                |
                                |  drivers
                                |  +------------+
                                +> | valid      |
                                   | cmd_attr   |
                                   +------------+
                                   | valid      |
                                   | cmd_attr   |
                                   +------------+
                                   | valid      |
                                   | obj_attr   |
                                   +------------+

Signed-off-by: Matan Barak <matanb@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/rdma_user_ioctl.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/rdma_user_ioctl.h b/include/uapi/rdma/rdma_user_ioctl.h
index 9388125ad51b..165a27e969d5 100644
--- a/include/uapi/rdma/rdma_user_ioctl.h
+++ b/include/uapi/rdma/rdma_user_ioctl.h
@@ -43,6 +43,39 @@
 /* Legacy name, for user space application which already use it */
 #define IB_IOCTL_MAGIC		RDMA_IOCTL_MAGIC
 
+#define RDMA_VERBS_IOCTL \
+	_IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr)
+
+#define UVERBS_ID_NS_MASK 0xF000
+#define UVERBS_ID_NS_SHIFT 12
+
+enum {
+	/* User input */
+	UVERBS_ATTR_F_MANDATORY = 1U << 0,
+	/*
+	 * Valid output bit should be ignored and considered set in
+	 * mandatory fields. This bit is kernel output.
+	 */
+	UVERBS_ATTR_F_VALID_OUTPUT = 1U << 1,
+};
+
+struct ib_uverbs_attr {
+	__u16 attr_id;		/* command specific type attribute */
+	__u16 len;		/* only for pointers */
+	__u16 flags;		/* combination of UVERBS_ATTR_F_XXXX */
+	__u16 reserved;
+	__u64 data;		/* ptr to command, inline data or idr/fd */
+};
+
+struct ib_uverbs_ioctl_hdr {
+	__u16 length;
+	__u16 object_id;
+	__u16 method_id;
+	__u16 num_attrs;
+	__u64 reserved;
+	struct ib_uverbs_attr  attrs[0];
+};
+
 /*
  * General blocks assignments
  * It is closed on purpose do not expose it it user space
-- 
cgit v1.2.3


From 64b19e1323e96c34af7ca90d1954e70890c7a98e Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 3 Aug 2017 16:07:03 +0300
Subject: IB/core: Export ioctl enum types to user-space

Add a new ib_user_ioctl_verbs.h which exports all required ABI
enums and structs to the user-space.
Export the default types to user-space through this file.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/ib_user_ioctl_verbs.h | 54 +++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 include/uapi/rdma/ib_user_ioctl_verbs.h

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
new file mode 100644
index 000000000000..78a2e5be4d6e
--- /dev/null
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_USER_IOCTL_VERBS_H
+#define IB_USER_IOCTL_VERBS_H
+
+enum uverbs_default_objects {
+	UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */
+	UVERBS_OBJECT_PD,
+	UVERBS_OBJECT_COMP_CHANNEL,
+	UVERBS_OBJECT_CQ,
+	UVERBS_OBJECT_QP,
+	UVERBS_OBJECT_SRQ,
+	UVERBS_OBJECT_AH,
+	UVERBS_OBJECT_MR,
+	UVERBS_OBJECT_MW,
+	UVERBS_OBJECT_FLOW,
+	UVERBS_OBJECT_XRCD,
+	UVERBS_OBJECT_RWQ_IND_TBL,
+	UVERBS_OBJECT_WQ,
+	UVERBS_OBJECT_LAST,
+};
+
+#endif
+
-- 
cgit v1.2.3


From d70724f149b107f8e4062320270d3d8b6713a1bb Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 3 Aug 2017 16:07:04 +0300
Subject: IB/core: Add legacy driver's user-data

In this phase, we don't want to change all the drivers to use
flexible driver's specific attributes. Therefore, we add two default
attributes: UHW_IN and UHW_OUT. These attributes are optional in some
methods and they encode the driver specific command data. We add
a function that extract this data and creates the legacy udata over
it.

Driver's data should start from UVERBS_UDATA_DRIVER_DATA_FLAG. This
turns on the first bit of the namespace, indicating this attribute
belongs to the driver's namespace.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/ib_user_ioctl_verbs.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 78a2e5be4d6e..90f81eeca35b 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -33,6 +33,11 @@
 #ifndef IB_USER_IOCTL_VERBS_H
 #define IB_USER_IOCTL_VERBS_H
 
+#include <rdma/rdma_user_ioctl.h>
+
+#define UVERBS_UDATA_DRIVER_DATA_NS	1
+#define UVERBS_UDATA_DRIVER_DATA_FLAG	(1UL << UVERBS_ID_NS_SHIFT)
+
 enum uverbs_default_objects {
 	UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */
 	UVERBS_OBJECT_PD,
@@ -50,5 +55,10 @@ enum uverbs_default_objects {
 	UVERBS_OBJECT_LAST,
 };
 
+enum {
+	UVERBS_UHW_IN = UVERBS_UDATA_DRIVER_DATA_FLAG,
+	UVERBS_UHW_OUT,
+};
+
 #endif
 
-- 
cgit v1.2.3


From 9ee79fce364216df35ec46e26d20780c3c1644cc Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 3 Aug 2017 16:07:05 +0300
Subject: IB/core: Add completion queue (cq) object actions

Adding CQ ioctl actions:
1. create_cq
2. destroy_cq

This requires adding the following:
1. A specification describing the method
	a. Handler
	b. Attributes specification
		Each attribute is one of the following:
		a. PTR_IN - input data
			    Note: This could be encoded inlined for
				  data < 64bit
		b. PTR_OUT - response data
		c. IDR - idr based object
		d. FD - fd based object
                Blobs attributes (clauses a and b) contain their type,
	        while objects specifications (clauses c and d)
                contains the expected object type (for example, the
                given id should be UVERBS_TYPE_PD) and the required
                access (READ, WRITE, NEW or DESTROY). If a NEW is
                required, the new object's id will be assigned to this
                attribute. All attributes could get UA_FLAGS
                attribute. Currently we support stating that an
		attribute is mandatory or that the specification size
                corresponds to a lower bound (and that this attribute
		could be extended).
		We currently add both default attributes and the two
		generic UHW_IN and UHW_OUT driver specific attributes.
2. Handler
   A handler gets a uverbs_attr_bundle. The handler developer uses
   uverbs_attr_get to fetch an attribute of a given id.
   Each of these attribute groups correspond to the specification
   group defined in the action (clauses 1.b and 1.c respectively).
   The indices of these arrays corresponds to the attribute ids
   declared in the specifications (clause 2).

   The handler is quite simple. It assumes the infrastructure fetched
   all objects and locked, created or destroyed them as required by
   the specification. Pointer (or blob) attributes were validated to
   match their required sizes. After the handler finished, the
   infrastructure commits or rollbacks the objects.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/ib_user_ioctl_verbs.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 90f81eeca35b..842792eae383 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -60,5 +60,25 @@ enum {
 	UVERBS_UHW_OUT,
 };
 
+enum uverbs_create_cq_cmd_attr_ids {
+	CREATE_CQ_HANDLE,
+	CREATE_CQ_CQE,
+	CREATE_CQ_USER_HANDLE,
+	CREATE_CQ_COMP_CHANNEL,
+	CREATE_CQ_COMP_VECTOR,
+	CREATE_CQ_FLAGS,
+	CREATE_CQ_RESP_CQE,
+};
+
+enum uverbs_destroy_cq_cmd_attr_ids {
+	DESTROY_CQ_HANDLE,
+	DESTROY_CQ_RESP,
+};
+
+enum uverbs_actions_cq_ops {
+	UVERBS_CQ_CREATE,
+	UVERBS_CQ_DESTROY,
+};
+
 #endif
 
-- 
cgit v1.2.3


From 8fc614c0ae5cb5df11d6aa9559e63baacf20a840 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 31 Aug 2017 14:12:39 -0500
Subject: PCI/AER: Reformat AER register definitions

Reformat so comments fit on same line as definition.  No functional change
intended.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/uapi/linux/pci_regs.h | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index c22d3ebaca20..46632aaee1c0 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -733,23 +733,17 @@
 #define  PCI_ERR_CAP_ECRC_CHKE	0x00000100	/* ECRC Check Enable */
 #define PCI_ERR_HEADER_LOG	28	/* Header Log Register (16 bytes) */
 #define PCI_ERR_ROOT_COMMAND	44	/* Root Error Command */
-/* Correctable Err Reporting Enable */
-#define PCI_ERR_ROOT_CMD_COR_EN		0x00000001
-/* Non-fatal Err Reporting Enable */
-#define PCI_ERR_ROOT_CMD_NONFATAL_EN	0x00000002
-/* Fatal Err Reporting Enable */
-#define PCI_ERR_ROOT_CMD_FATAL_EN	0x00000004
+#define PCI_ERR_ROOT_CMD_COR_EN		0x00000001 /* Correctable Err Reporting Enable */
+#define PCI_ERR_ROOT_CMD_NONFATAL_EN	0x00000002 /* Non-Fatal Err Reporting Enable */
+#define PCI_ERR_ROOT_CMD_FATAL_EN	0x00000004 /* Fatal Err Reporting Enable */
 #define PCI_ERR_ROOT_STATUS	48
-#define PCI_ERR_ROOT_COR_RCV		0x00000001	/* ERR_COR Received */
-/* Multi ERR_COR Received */
-#define PCI_ERR_ROOT_MULTI_COR_RCV	0x00000002
-/* ERR_FATAL/NONFATAL Received */
-#define PCI_ERR_ROOT_UNCOR_RCV		0x00000004
-/* Multi ERR_FATAL/NONFATAL Received */
-#define PCI_ERR_ROOT_MULTI_UNCOR_RCV	0x00000008
-#define PCI_ERR_ROOT_FIRST_FATAL	0x00000010	/* First Fatal */
-#define PCI_ERR_ROOT_NONFATAL_RCV	0x00000020	/* Non-Fatal Received */
-#define PCI_ERR_ROOT_FATAL_RCV		0x00000040	/* Fatal Received */
+#define PCI_ERR_ROOT_COR_RCV		0x00000001 /* ERR_COR Received */
+#define PCI_ERR_ROOT_MULTI_COR_RCV	0x00000002 /* Multiple ERR_COR */
+#define PCI_ERR_ROOT_UNCOR_RCV		0x00000004 /* ERR_FATAL/NONFATAL */
+#define PCI_ERR_ROOT_MULTI_UNCOR_RCV	0x00000008 /* Multiple FATAL/NONFATAL */
+#define PCI_ERR_ROOT_FIRST_FATAL	0x00000010 /* First UNC is Fatal */
+#define PCI_ERR_ROOT_NONFATAL_RCV	0x00000020 /* Non-Fatal Received */
+#define PCI_ERR_ROOT_FATAL_RCV		0x00000040 /* Fatal Received */
 #define PCI_ERR_ROOT_ERR_SRC	52	/* Error Source Identification */
 
 /* Virtual Channel */
-- 
cgit v1.2.3


From 89e4fdecb51cf5535867026274bc97de9480ade5 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Thu, 24 Aug 2017 00:03:43 -0700
Subject: loop: add ioctl for changing logical block size

This is a different approach from the first attempt in f2c6df7dbf9a
("loop: support 4k physical blocksize"). Rather than extending
LOOP_{GET,SET}_STATUS, add a separate ioctl just for setting the block
size.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/loop.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index c8125ec1f4f2..23158dbe2424 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -88,6 +88,7 @@ struct loop_info64 {
 #define LOOP_CHANGE_FD		0x4C06
 #define LOOP_SET_CAPACITY	0x4C07
 #define LOOP_SET_DIRECT_IO	0x4C08
+#define LOOP_SET_BLOCK_SIZE	0x4C09
 
 /* /dev/loop-control interface */
 #define LOOP_CTL_ADD		0x4C80
-- 
cgit v1.2.3


From ddef7ed2b5cbafae692d1d580bb5a07808926a9c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 6 Jul 2017 18:58:37 +0200
Subject: annotate RWF_... flags

[AV: added missing annotations in syscalls.h/compat.h]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/uapi/linux/aio_abi.h | 21 +++++++++++----------
 include/uapi/linux/fs.h      | 28 ++++++++++++++++++++--------
 2 files changed, 31 insertions(+), 18 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index a2d4a8ac94ca..a04adbc70ddf 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -28,6 +28,7 @@
 #define __LINUX__AIO_ABI_H
 
 #include <linux/types.h>
+#include <linux/fs.h>
 #include <asm/byteorder.h>
 
 typedef __kernel_ulong_t aio_context_t;
@@ -62,14 +63,6 @@ struct io_event {
 	__s64		res2;		/* secondary result */
 };
 
-#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
-#define PADDED(x,y)	x, y
-#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN)
-#define PADDED(x,y)	y, x
-#else
-#error edit for your odd byteorder.
-#endif
-
 /*
  * we always use a 64bit off_t when communicating
  * with userland.  its up to libraries to do the
@@ -79,8 +72,16 @@ struct io_event {
 struct iocb {
 	/* these are internal to the kernel/libc. */
 	__u64	aio_data;	/* data to be returned in event's data */
-	__u32	PADDED(aio_key, aio_rw_flags);
-				/* the kernel sets aio_key to the req # */
+
+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
+	__u32	aio_key;	/* the kernel sets aio_key to the req # */
+	__kernel_rwf_t aio_rw_flags;	/* RWF_* flags */
+#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN)
+	__kernel_rwf_t aio_rw_flags;	/* RWF_* flags */
+	__u32	aio_key;	/* the kernel sets aio_key to the req # */
+#else
+#error edit for your odd byteorder.
+#endif
 
 	/* common fields */
 	__u16	aio_lio_opcode;	/* see IOCB_CMD_ above */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index b7495d05e8de..56235dddea7d 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -358,13 +358,25 @@ struct fscrypt_key {
 #define SYNC_FILE_RANGE_WRITE		2
 #define SYNC_FILE_RANGE_WAIT_AFTER	4
 
-/* flags for preadv2/pwritev2: */
-#define RWF_HIPRI			0x00000001 /* high priority request, poll if possible */
-#define RWF_DSYNC			0x00000002 /* per-IO O_DSYNC */
-#define RWF_SYNC			0x00000004 /* per-IO O_SYNC */
-#define RWF_NOWAIT			0x00000008 /* per-IO, return -EAGAIN if operation would block */
-
-#define RWF_SUPPORTED			(RWF_HIPRI | RWF_DSYNC | RWF_SYNC |\
-					 RWF_NOWAIT)
+/*
+ * Flags for preadv2/pwritev2:
+ */
+
+typedef int __bitwise __kernel_rwf_t;
+
+/* high priority request, poll if possible */
+#define RWF_HIPRI	((__force __kernel_rwf_t)0x00000001)
+
+/* per-IO O_DSYNC */
+#define RWF_DSYNC	((__force __kernel_rwf_t)0x00000002)
+
+/* per-IO O_SYNC */
+#define RWF_SYNC	((__force __kernel_rwf_t)0x00000004)
+
+/* per-IO, return -EAGAIN if operation would block */
+#define RWF_NOWAIT	((__force __kernel_rwf_t)0x00000008)
+
+/* mask of flags supported by the kernel */
+#define RWF_SUPPORTED	(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT)
 
 #endif /* _UAPI_LINUX_FS_H */
-- 
cgit v1.2.3


From 1797f5b3cf0b3a73c42b89f7a8fd897417373730 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Thu, 31 Aug 2017 17:59:12 +0200
Subject: devlink: Add IPv6 header for dpipe

This will be used by the IPv6 host table which will be introduced in the
following patches. The fields in the header are added per-use. This header
is global and can be reused by many drivers.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 6c172548589d..0cbca96c66b9 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -234,9 +234,14 @@ enum devlink_dpipe_field_ipv4_id {
 	DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
 };
 
+enum devlink_dpipe_field_ipv6_id {
+	DEVLINK_DPIPE_FIELD_IPV6_DST_IP,
+};
+
 enum devlink_dpipe_header_id {
 	DEVLINK_DPIPE_HEADER_ETHERNET,
 	DEVLINK_DPIPE_HEADER_IPV4,
+	DEVLINK_DPIPE_HEADER_IPV6,
 };
 
 #endif /* _UAPI_LINUX_DEVLINK_H_ */
-- 
cgit v1.2.3


From 482dca939fb7ee35ba20b944b4c2476133dbf0df Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 31 Aug 2017 15:05:44 -0700
Subject: bpf: Add mark and priority to sock options that can be set

Add socket mark and priority to fields that can be set by
ebpf program when a socket is created.

Signed-off-by: David Ahern <dsahern@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 08c206a863e1..ba848b761cfb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -758,6 +758,8 @@ struct bpf_sock {
 	__u32 family;
 	__u32 type;
 	__u32 protocol;
+	__u32 mark;
+	__u32 priority;
 };
 
 #define XDP_PACKET_HEADROOM 256
-- 
cgit v1.2.3


From abcc61537e3566cae7f1fd225f2dcb82b3595fe3 Mon Sep 17 00:00:00 2001
From: Colin Cross <ccross@android.com>
Date: Thu, 31 Aug 2017 10:04:24 +0200
Subject: ANDROID: binder: Add BINDER_GET_NODE_DEBUG_INFO ioctl

The BINDER_GET_NODE_DEBUG_INFO ioctl will return debug info on
a node.  Each successive call reusing the previous return value
will return the next node.  The data will be used by
libmemunreachable to mark the pointers with kernel references
as reachable.

Signed-off-by: Colin Cross <ccross@android.com>
Signed-off-by: Martijn Coenen <maco@android.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/android/binder.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index 7668b5791c91..84a9a0944e13 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -186,6 +186,19 @@ struct binder_version {
 #define BINDER_CURRENT_PROTOCOL_VERSION 8
 #endif
 
+/*
+ * Use with BINDER_GET_NODE_DEBUG_INFO, driver reads ptr, writes to all fields.
+ * Set ptr to NULL for the first call to get the info for the first node, and
+ * then repeat the call passing the previously returned value to get the next
+ * nodes.  ptr will be 0 when there are no more nodes.
+ */
+struct binder_node_debug_info {
+	binder_uintptr_t ptr;
+	binder_uintptr_t cookie;
+	__u32            has_strong_ref;
+	__u32            has_weak_ref;
+};
+
 #define BINDER_WRITE_READ		_IOWR('b', 1, struct binder_write_read)
 #define BINDER_SET_IDLE_TIMEOUT		_IOW('b', 3, __s64)
 #define BINDER_SET_MAX_THREADS		_IOW('b', 5, __u32)
@@ -193,6 +206,7 @@ struct binder_version {
 #define BINDER_SET_CONTEXT_MGR		_IOW('b', 7, __s32)
 #define BINDER_THREAD_EXIT		_IOW('b', 8, __s32)
 #define BINDER_VERSION			_IOWR('b', 9, struct binder_version)
+#define BINDER_GET_NODE_DEBUG_INFO	_IOWR('b', 11, struct binder_node_debug_info)
 
 /*
  * NOTE: Two special error codes you should check for when calling
-- 
cgit v1.2.3


From 8db6c34f1dbc8e06aa016a9b829b06902c3e1340 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serge@hallyn.com>
Date: Mon, 8 May 2017 13:11:56 -0500
Subject: Introduce v3 namespaced file capabilities

Root in a non-initial user ns cannot be trusted to write a traditional
security.capability xattr.  If it were allowed to do so, then any
unprivileged user on the host could map his own uid to root in a private
namespace, write the xattr, and execute the file with privilege on the
host.

However supporting file capabilities in a user namespace is very
desirable.  Not doing so means that any programs designed to run with
limited privilege must continue to support other methods of gaining and
dropping privilege.  For instance a program installer must detect
whether file capabilities can be assigned, and assign them if so but set
setuid-root otherwise.  The program in turn must know how to drop
partial capabilities, and do so only if setuid-root.

This patch introduces v3 of the security.capability xattr.  It builds a
vfs_ns_cap_data struct by appending a uid_t rootid to struct
vfs_cap_data.  This is the absolute uid_t (that is, the uid_t in user
namespace which mounted the filesystem, usually init_user_ns) of the
root id in whose namespaces the file capabilities may take effect.

When a task asks to write a v2 security.capability xattr, if it is
privileged with respect to the userns which mounted the filesystem, then
nothing should change.  Otherwise, the kernel will transparently rewrite
the xattr as a v3 with the appropriate rootid.  This is done during the
execution of setxattr() to catch user-space-initiated capability writes.
Subsequently, any task executing the file which has the noted kuid as
its root uid, or which is in a descendent user_ns of such a user_ns,
will run the file with capabilities.

Similarly when asking to read file capabilities, a v3 capability will
be presented as v2 if it applies to the caller's namespace.

If a task writes a v3 security.capability, then it can provide a uid for
the xattr so long as the uid is valid in its own user namespace, and it
is privileged with CAP_SETFCAP over its namespace.  The kernel will
translate that rootid to an absolute uid, and write that to disk.  After
this, a task in the writer's namespace will not be able to use those
capabilities (unless rootid was 0), but a task in a namespace where the
given uid is root will.

Only a single security.capability xattr may exist at a time for a given
file.  A task may overwrite an existing xattr so long as it is
privileged over the inode.  Note this is a departure from previous
semantics, which required privilege to remove a security.capability
xattr.  This check can be re-added if deemed useful.

This allows a simple setxattr to work, allows tar/untar to work, and
allows us to tar in one namespace and untar in another while preserving
the capability, without risking leaking privilege into a parent
namespace.

Example using tar:

 $ cp /bin/sleep sleepx
 $ mkdir b1 b2
 $ lxc-usernsexec -m b:0:100000:1 -m b:1:$(id -u):1 -- chown 0:0 b1
 $ lxc-usernsexec -m b:0:100001:1 -m b:1:$(id -u):1 -- chown 0:0 b2
 $ lxc-usernsexec -m b:0:100000:1000 -- tar --xattrs-include=security.capability --xattrs -cf b1/sleepx.tar sleepx
 $ lxc-usernsexec -m b:0:100001:1000 -- tar --xattrs-include=security.capability --xattrs -C b2 -xf b1/sleepx.tar
 $ lxc-usernsexec -m b:0:100001:1000 -- getcap b2/sleepx
   b2/sleepx = cap_sys_admin+ep
 # /opt/ltp/testcases/bin/getv3xattr b2/sleepx
   v3 xattr, rootid is 100001

A patch to linux-test-project adding a new set of tests for this
functionality is in the nsfscaps branch at github.com/hallyn/ltp

Changelog:
   Nov 02 2016: fix invalid check at refuse_fcap_overwrite()
   Nov 07 2016: convert rootid from and to fs user_ns
   (From ebiederm: mar 28 2017)
     commoncap.c: fix typos - s/v4/v3
     get_vfs_caps_from_disk: clarify the fs_ns root access check
     nsfscaps: change the code split for cap_inode_setxattr()
   Apr 09 2017:
       don't return v3 cap for caps owned by current root.
      return a v2 cap for a true v2 cap in non-init ns
   Apr 18 2017:
      . Change the flow of fscap writing to support s_user_ns writing.
      . Remove refuse_fcap_overwrite().  The value of the previous
        xattr doesn't matter.
   Apr 24 2017:
      . incorporate Eric's incremental diff
      . move cap_convert_nscap to setxattr and simplify its usage
   May 8, 2017:
      . fix leaking dentry refcount in cap_inode_getsecurity

Signed-off-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/uapi/linux/capability.h | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h
index 6fe14d001f68..230e05d35191 100644
--- a/include/uapi/linux/capability.h
+++ b/include/uapi/linux/capability.h
@@ -60,9 +60,13 @@ typedef struct __user_cap_data_struct {
 #define VFS_CAP_U32_2           2
 #define XATTR_CAPS_SZ_2         (sizeof(__le32)*(1 + 2*VFS_CAP_U32_2))
 
-#define XATTR_CAPS_SZ           XATTR_CAPS_SZ_2
-#define VFS_CAP_U32             VFS_CAP_U32_2
-#define VFS_CAP_REVISION	VFS_CAP_REVISION_2
+#define VFS_CAP_REVISION_3	0x03000000
+#define VFS_CAP_U32_3           2
+#define XATTR_CAPS_SZ_3         (sizeof(__le32)*(2 + 2*VFS_CAP_U32_3))
+
+#define XATTR_CAPS_SZ           XATTR_CAPS_SZ_3
+#define VFS_CAP_U32             VFS_CAP_U32_3
+#define VFS_CAP_REVISION	VFS_CAP_REVISION_3
 
 struct vfs_cap_data {
 	__le32 magic_etc;            /* Little endian */
@@ -72,6 +76,18 @@ struct vfs_cap_data {
 	} data[VFS_CAP_U32];
 };
 
+/*
+ * same as vfs_cap_data but with a rootid at the end
+ */
+struct vfs_ns_cap_data {
+	__le32 magic_etc;
+	struct {
+		__le32 permitted;    /* Little endian */
+		__le32 inheritable;  /* Little endian */
+	} data[VFS_CAP_U32];
+	__le32 rootid;
+};
+
 #ifndef __KERNEL__
 
 /*
-- 
cgit v1.2.3


From d897246df9fc0a5df97a784bf7b072be4a6ae479 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 31 Aug 2017 13:47:35 -0700
Subject: fsmap: fix documentation of FMR_OF_LAST

The FMR_OF_LAST flag is set on the last fsmap record being returned for
the dataset requested, contrary to what the header file says.  Fix the
docs to reflect the behavior of all fsmap implementations.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 include/uapi/linux/fsmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/fsmap.h b/include/uapi/linux/fsmap.h
index 7e8e5f0bd6d2..e5213c3e38b2 100644
--- a/include/uapi/linux/fsmap.h
+++ b/include/uapi/linux/fsmap.h
@@ -96,7 +96,7 @@ fsmap_advance(
 #define FMR_OF_EXTENT_MAP	0x4	/* segment = extent map */
 #define FMR_OF_SHARED		0x8	/* segment = shared with another file */
 #define FMR_OF_SPECIAL_OWNER	0x10	/* owner is a special value */
-#define FMR_OF_LAST		0x20	/* segment is the last in the FS */
+#define FMR_OF_LAST		0x20	/* segment is the last in the dataset */
 
 /* Each FS gets to define its own special owner codes. */
 #define FMR_OWNER(type, code)	(((__u64)type << 32) | \
-- 
cgit v1.2.3


From c03fa9bcacd9ac04595cc13f34f3445f0a5ecf13 Mon Sep 17 00:00:00 2001
From: Ivan Delalande <colona@arista.com>
Date: Thu, 31 Aug 2017 09:59:39 -0700
Subject: tcp_diag: report TCP MD5 signing keys and addresses

Report TCP MD5 (RFC2385) signing keys, addresses and address prefixes to
processes with CAP_NET_ADMIN requesting INET_DIAG_INFO. Currently it is
not possible to retrieve these from the kernel once they have been
configured on sockets.

Signed-off-by: Ivan Delalande <colona@arista.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/inet_diag.h | 1 +
 include/uapi/linux/tcp.h       | 9 +++++++++
 2 files changed, 10 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index 678496897a68..f52ff62bfabe 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -143,6 +143,7 @@ enum {
 	INET_DIAG_MARK,
 	INET_DIAG_BBRINFO,
 	INET_DIAG_CLASS_ID,
+	INET_DIAG_MD5SIG,
 	__INET_DIAG_MAX,
 };
 
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 030e594bab45..15c25eccab2b 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -256,4 +256,13 @@ struct tcp_md5sig {
 	__u8	tcpm_key[TCP_MD5SIG_MAXKEYLEN];		/* key (binary) */
 };
 
+/* INET_DIAG_MD5SIG */
+struct tcp_diag_md5sig {
+	__u8	tcpm_family;
+	__u8	tcpm_prefixlen;
+	__u16	tcpm_keylen;
+	__be32	tcpm_addr[4];
+	__u8	tcpm_key[TCP_MD5SIG_MAXKEYLEN];
+};
+
 #endif /* _UAPI_LINUX_TCP_H */
-- 
cgit v1.2.3


From bea74641e3786d51dcf1175527cc1781420961c9 Mon Sep 17 00:00:00 2001
From: Vishwanath Pai <vpai@akamai.com>
Date: Fri, 18 Aug 2017 20:58:59 +0200
Subject: netfilter: xt_hashlimit: add rate match mode

This patch adds a new feature to hashlimit that allows matching on the
current packet/byte rate without rate limiting. This can be enabled
with a new flag --hashlimit-rate-match. The match returns true if the
current rate of packets is above/below the user specified value.

The main difference between the existing algorithm and the new one is
that the existing algorithm rate-limits the flow whereas the new
algorithm does not. Instead it *classifies* the flow based on whether
it is above or below a certain rate. I will demonstrate this with an
example below. Let us assume this rule:

iptables -A INPUT -m hashlimit --hashlimit-above 10/s -j new_chain

If the packet rate is 15/s, the existing algorithm would ACCEPT 10
packets every second and send 5 packets to "new_chain".

But with the new algorithm, as long as the rate of 15/s is sustained,
all packets will continue to match and every packet is sent to new_chain.

This new functionality will let us classify different flows based on
their current rate, so that further decisions can be made on them based on
what the current rate is.

This is how the new algorithm works:
We divide time into intervals of 1 (sec/min/hour) as specified by
the user. We keep track of the number of packets/bytes processed in the
current interval. After each interval we reset the counter to 0.

When we receive a packet for match, we look at the packet rate
during the current interval and the previous interval to make a
decision:

if [ prev_rate < user and cur_rate < user ]
        return Below
else
        return Above

Where cur_rate is the number of packets/bytes seen in the current
interval, prev is the number of packets/bytes seen in the previous
interval and 'user' is the rate specified by the user.

We also provide flexibility to the user for choosing the time
interval using the option --hashilmit-interval. For example the user can
keep a low rate like x/hour but still keep the interval as small as 1
second.

To preserve backwards compatibility we have to add this feature in a new
revision, so I've created revision 3 for hashlimit. The two new options
we add are:

--hashlimit-rate-match
--hashlimit-rate-interval

I have updated the help text to add these new options. Also added a few
tests for the new options.

Suggested-by: Igor Lubashev <ilubashe@akamai.com>
Reviewed-by: Josh Hunt <johunt@akamai.com>
Signed-off-by: Vishwanath Pai <vpai@akamai.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/xt_hashlimit.h | 36 ++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/xt_hashlimit.h b/include/uapi/linux/netfilter/xt_hashlimit.h
index 79da349f1060..aa98573248b1 100644
--- a/include/uapi/linux/netfilter/xt_hashlimit.h
+++ b/include/uapi/linux/netfilter/xt_hashlimit.h
@@ -19,12 +19,13 @@
 struct xt_hashlimit_htable;
 
 enum {
-	XT_HASHLIMIT_HASH_DIP = 1 << 0,
-	XT_HASHLIMIT_HASH_DPT = 1 << 1,
-	XT_HASHLIMIT_HASH_SIP = 1 << 2,
-	XT_HASHLIMIT_HASH_SPT = 1 << 3,
-	XT_HASHLIMIT_INVERT   = 1 << 4,
-	XT_HASHLIMIT_BYTES    = 1 << 5,
+	XT_HASHLIMIT_HASH_DIP		= 1 << 0,
+	XT_HASHLIMIT_HASH_DPT		= 1 << 1,
+	XT_HASHLIMIT_HASH_SIP		= 1 << 2,
+	XT_HASHLIMIT_HASH_SPT		= 1 << 3,
+	XT_HASHLIMIT_INVERT		= 1 << 4,
+	XT_HASHLIMIT_BYTES		= 1 << 5,
+	XT_HASHLIMIT_RATE_MATCH		= 1 << 6,
 };
 
 struct hashlimit_cfg {
@@ -79,6 +80,21 @@ struct hashlimit_cfg2 {
 	__u8 srcmask, dstmask;
 };
 
+struct hashlimit_cfg3 {
+	__u64 avg;		/* Average secs between packets * scale */
+	__u64 burst;		/* Period multiplier for upper limit. */
+	__u32 mode;		/* bitmask of XT_HASHLIMIT_HASH_* */
+
+	/* user specified */
+	__u32 size;		/* how many buckets */
+	__u32 max;		/* max number of entries */
+	__u32 gc_interval;	/* gc interval */
+	__u32 expire;		/* when do entries expire? */
+
+	__u32 interval;
+	__u8 srcmask, dstmask;
+};
+
 struct xt_hashlimit_mtinfo1 {
 	char name[IFNAMSIZ];
 	struct hashlimit_cfg1 cfg;
@@ -95,4 +111,12 @@ struct xt_hashlimit_mtinfo2 {
 	struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
 };
 
+struct xt_hashlimit_mtinfo3 {
+	char name[NAME_MAX];
+	struct hashlimit_cfg3 cfg;
+
+	/* Used internally by the kernel */
+	struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
+};
+
 #endif /* _UAPI_XT_HASHLIMIT_H */
-- 
cgit v1.2.3


From a691205571723cb0544110ca91653ac4b0eb5b17 Mon Sep 17 00:00:00 2001
From: "Pablo M. Bermudo Garay" <pablombg@gmail.com>
Date: Wed, 23 Aug 2017 22:41:25 +0200
Subject: netfilter: nft_limit: add stateful object type

Register a new limit stateful object type into the stateful object
infrastructure.

Signed-off-by: Pablo M. Bermudo Garay <pablombg@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index b49da72efa68..871afa4871bf 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1282,7 +1282,8 @@ enum nft_ct_helper_attributes {
 #define NFT_OBJECT_COUNTER	1
 #define NFT_OBJECT_QUOTA	2
 #define NFT_OBJECT_CT_HELPER	3
-#define __NFT_OBJECT_MAX	4
+#define NFT_OBJECT_LIMIT	4
+#define __NFT_OBJECT_MAX	5
 #define NFT_OBJECT_MAX		(__NFT_OBJECT_MAX - 1)
 
 /**
-- 
cgit v1.2.3


From 2335ba704f32b855651d0cd15dd9b271ec565fb6 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 3 Sep 2017 23:55:59 +0200
Subject: netlink: add NLM_F_NONREC flag for deletion requests

In the last NFWS in Faro, Portugal, we discussed that netlink is lacking
the semantics to request non recursive deletions, ie. do not delete an
object iff it has child objects that hang from this parent object that
the user requests to be deleted.

We need this new flag to solve a problem for the iptables-compat
backward compatibility utility, that runs iptables commands using the
existing nf_tables netlink interface. Specifically, custom chains in
iptables cannot be deleted if there are rules in it, however, nf_tables
allows to remove any chain that is populated with content. To sort out
this asymmetry, iptables-compat userspace sets this new NLM_F_NONREC
flag to obtain the same semantics that iptables provides.

This new flag should only be used for deletion requests. Note this new
flag value overlaps with the existing:

* NLM_F_ROOT for get requests.
* NLM_F_REPLACE for new requests.

However, those flags should not ever be used in deletion requests.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netlink.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index f4fc9c9e123d..e8af60a7c56d 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -69,6 +69,9 @@ struct nlmsghdr {
 #define NLM_F_CREATE	0x400	/* Create, if it does not exist	*/
 #define NLM_F_APPEND	0x800	/* Add to end of list		*/
 
+/* Modifiers to DELETE request */
+#define NLM_F_NONREC	0x100	/* Do not delete recursively	*/
+
 /* Flags for ACK message */
 #define NLM_F_CAPPED	0x100	/* request was capped */
 #define NLM_F_ACK_TLVS	0x200	/* extended ACK TVLs were included */
-- 
cgit v1.2.3


From e652f694598273c5d749687032d1534a30e6a3a5 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Wed, 6 Sep 2017 16:23:25 -0700
Subject: mm: hugetlb: define system call hugetlb size encodings in single file

Patch series "Consolidate system call hugetlb page size encodings".

These patches are the result of discussions in
https://lkml.org/lkml/2017/3/8/548.  The following changes are made in the
patch set:

1) Put all the log2 encoded huge page size definitions in a common
   header file.  The idea is have a set of definitions that can be use as
   the basis for system call specific definitions such as MAP_HUGE_* and
   SHM_HUGE_*.

2) Remove MAP_HUGE_* definitions in arch specific files.  All these
   definitions are the same.  Consolidate all definitions in the primary
   user header file (uapi/linux/mman.h).

3) Remove SHM_HUGE_* definitions intended for user space from kernel
   header file, and add to user (uapi/linux/shm.h) header file.  Add
   definitions for all known huge page size encodings as in mmap.

This patch (of 3):

If hugetlb pages are requested in mmap or shmget system calls, a huge
page size other than default can be requested.  This is accomplished by
encoding the log2 of the huge page size in the upper bits of the flag
argument.  asm-generic and arch specific headers all define the same
values for these encodings.

Put common definitions in a single header file.  The primary uapi header
files for mmap and shm will use these definitions as a basis for
definitions specific to those system calls.

Link: http://lkml.kernel.org/r/1501527386-10736-2-git-send-email-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/asm-generic/hugetlb_encode.h | 34 +++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 include/uapi/asm-generic/hugetlb_encode.h

(limited to 'include/uapi')

diff --git a/include/uapi/asm-generic/hugetlb_encode.h b/include/uapi/asm-generic/hugetlb_encode.h
new file mode 100644
index 000000000000..e4732d3c2998
--- /dev/null
+++ b/include/uapi/asm-generic/hugetlb_encode.h
@@ -0,0 +1,34 @@
+#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_
+#define _ASM_GENERIC_HUGETLB_ENCODE_H_
+
+/*
+ * Several system calls take a flag to request "hugetlb" huge pages.
+ * Without further specification, these system calls will use the
+ * system's default huge page size.  If a system supports multiple
+ * huge page sizes, the desired huge page size can be specified in
+ * bits [26:31] of the flag arguments.  The value in these 6 bits
+ * will encode the log2 of the huge page size.
+ *
+ * The following definitions are associated with this huge page size
+ * encoding in flag arguments.  System call specific header files
+ * that use this encoding should include this file.  They can then
+ * provide definitions based on these with their own specific prefix.
+ * for example:
+ * #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
+ */
+
+#define HUGETLB_FLAG_ENCODE_SHIFT	26
+#define HUGETLB_FLAG_ENCODE_MASK	0x3f
+
+#define HUGETLB_FLAG_ENCODE_64KB	(16 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512KB	(19 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1MB		(20 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2MB		(21 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_8MB		(23 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16MB	(24 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_256MB	(28 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1GB		(30 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2GB		(31 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16GB	(34 << HUGETLB_FLAG_ENCODE_SHIFT)
+
+#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */
-- 
cgit v1.2.3


From aafd4562dfee81a40ba21b5ea3cf5e06664bc7f6 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Wed, 6 Sep 2017 16:23:29 -0700
Subject: mm: arch: consolidate mmap hugetlb size encodings

A non-default huge page size can be encoded in the flags argument of the
mmap system call.  The definitions for these encodings are in arch
specific header files.  However, all architectures use the same values.

Consolidate all the definitions in the primary user header file
(uapi/linux/mman.h).  Include definitions for all known huge page sizes.
Use the generic encoding definitions in hugetlb_encode.h as the basis
for these definitions.

Link: http://lkml.kernel.org/r/1501527386-10736-3-git-send-email-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/asm-generic/mman-common.h | 11 -----------
 include/uapi/linux/mman.h              | 22 ++++++++++++++++++++++
 2 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index 8c27db0c5c08..d248f3c335b5 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -61,17 +61,6 @@
 /* compatibility flags */
 #define MAP_FILE	0
 
-/*
- * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
- * This gives us 6 bits, which is enough until someone invents 128 bit address
- * spaces.
- *
- * Assume these are all power of twos.
- * When 0 use the default page size.
- */
-#define MAP_HUGE_SHIFT	26
-#define MAP_HUGE_MASK	0x3f
-
 #define PKEY_DISABLE_ACCESS	0x1
 #define PKEY_DISABLE_WRITE	0x2
 #define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index ade4acd3a90c..a937480d7cd3 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -2,6 +2,7 @@
 #define _UAPI_LINUX_MMAN_H
 
 #include <asm/mman.h>
+#include <asm-generic/hugetlb_encode.h>
 
 #define MREMAP_MAYMOVE	1
 #define MREMAP_FIXED	2
@@ -10,4 +11,25 @@
 #define OVERCOMMIT_ALWAYS		1
 #define OVERCOMMIT_NEVER		2
 
+/*
+ * Huge page size encoding when MAP_HUGETLB is specified, and a huge page
+ * size other than the default is desired.  See hugetlb_encode.h.
+ * All known huge page size encodings are provided here.  It is the
+ * responsibility of the application to know which sizes are supported on
+ * the running system.  See mmap(2) man page for details.
+ */
+#define MAP_HUGE_SHIFT	HUGETLB_FLAG_ENCODE_SHIFT
+#define MAP_HUGE_MASK	HUGETLB_FLAG_ENCODE_MASK
+
+#define MAP_HUGE_64KB	HUGETLB_FLAG_ENCODE_64KB
+#define MAP_HUGE_512KB	HUGETLB_FLAG_ENCODE_512KB
+#define MAP_HUGE_1MB	HUGETLB_FLAG_ENCODE_1MB
+#define MAP_HUGE_2MB	HUGETLB_FLAG_ENCODE_2MB
+#define MAP_HUGE_8MB	HUGETLB_FLAG_ENCODE_8MB
+#define MAP_HUGE_16MB	HUGETLB_FLAG_ENCODE_16MB
+#define MAP_HUGE_256MB	HUGETLB_FLAG_ENCODE_256MB
+#define MAP_HUGE_1GB	HUGETLB_FLAG_ENCODE_1GB
+#define MAP_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
+#define MAP_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
+
 #endif /* _UAPI_LINUX_MMAN_H */
-- 
cgit v1.2.3


From 4da243ac1cf6aeb30b7c555d56208982d66d6d33 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Wed, 6 Sep 2017 16:23:33 -0700
Subject: mm: shm: use new hugetlb size encoding definitions

Use the common definitions from hugetlb_encode.h header file for
encoding hugetlb size definitions in shmget system call flags.

In addition, move these definitions from the internal (kernel) to user
(uapi) header file.

Link: http://lkml.kernel.org/r/1501527386-10736-4-git-send-email-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/shm.h | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h
index 1fbf24ea37fd..cf23c873719d 100644
--- a/include/uapi/linux/shm.h
+++ b/include/uapi/linux/shm.h
@@ -3,6 +3,7 @@
 
 #include <linux/ipc.h>
 #include <linux/errno.h>
+#include <asm-generic/hugetlb_encode.h>
 #ifndef __KERNEL__
 #include <unistd.h>
 #endif
@@ -40,11 +41,37 @@ struct shmid_ds {
 /* Include the definition of shmid64_ds and shminfo64 */
 #include <asm/shmbuf.h>
 
-/* permission flag for shmget */
+/*
+ * shmget() shmflg values.
+ */
+/* The bottom nine bits are the same as open(2) mode flags */
 #define SHM_R		0400	/* or S_IRUGO from <linux/stat.h> */
 #define SHM_W		0200	/* or S_IWUGO from <linux/stat.h> */
+/* Bits 9 & 10 are IPC_CREAT and IPC_EXCL */
+#define SHM_HUGETLB	04000	/* segment will use huge TLB pages */
+#define SHM_NORESERVE	010000	/* don't check for reservations */
+
+/*
+ * Huge page size encoding when SHM_HUGETLB is specified, and a huge page
+ * size other than the default is desired.  See hugetlb_encode.h
+ */
+#define SHM_HUGE_SHIFT	HUGETLB_FLAG_ENCODE_SHIFT
+#define SHM_HUGE_MASK	HUGETLB_FLAG_ENCODE_MASK
+
+#define SHM_HUGE_64KB	HUGETLB_FLAG_ENCODE_64KB
+#define SHM_HUGE_512KB	HUGETLB_FLAG_ENCODE_512KB
+#define SHM_HUGE_1MB	HUGETLB_FLAG_ENCODE_1MB
+#define SHM_HUGE_2MB	HUGETLB_FLAG_ENCODE_2MB
+#define SHM_HUGE_8MB	HUGETLB_FLAG_ENCODE_8MB
+#define SHM_HUGE_16MB	HUGETLB_FLAG_ENCODE_16MB
+#define SHM_HUGE_256MB	HUGETLB_FLAG_ENCODE_256MB
+#define SHM_HUGE_1GB	HUGETLB_FLAG_ENCODE_1GB
+#define SHM_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
+#define SHM_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
 
-/* mode for attach */
+/*
+ * shmat() shmflg values
+ */
 #define	SHM_RDONLY	010000	/* read-only access */
 #define	SHM_RND		020000	/* round attach address to SHMLBA boundary */
 #define	SHM_REMAP	040000	/* take-over region on attach */
-- 
cgit v1.2.3


From 2d6d6f5a09a96cc1fec7ed992b825e05f64cb50e Mon Sep 17 00:00:00 2001
From: Prakash Sangappa <prakash.sangappa@oracle.com>
Date: Wed, 6 Sep 2017 16:23:39 -0700
Subject: mm: userfaultfd: add feature to request for a signal delivery

In some cases, userfaultfd mechanism should just deliver a SIGBUS signal
to the faulting process, instead of the page-fault event.  Dealing with
page-fault event using a monitor thread can be an overhead in these
cases.  For example applications like the database could use the
signaling mechanism for robustness purpose.

Database uses hugetlbfs for performance reason.  Files on hugetlbfs
filesystem are created and huge pages allocated using fallocate() API.
Pages are deallocated/freed using fallocate() hole punching support.
These files are mmapped and accessed by many processes as shared memory.
The database keeps track of which offsets in the hugetlbfs file have
pages allocated.

Any access to mapped address over holes in the file, which can occur due
to bugs in the application, is considered invalid and expect the process
to simply receive a SIGBUS.  However, currently when a hole in the file
is accessed via the mapped address, kernel/mm attempts to automatically
allocate a page at page fault time, resulting in implicitly filling the
hole in the file.  This may not be the desired behavior for applications
like the database that want to explicitly manage page allocations of
hugetlbfs files.

Using userfaultfd mechanism with this support to get a signal, database
application can prevent pages from being allocated implicitly when
processes access mapped address over holes in the file.

This patch adds UFFD_FEATURE_SIGBUS feature to userfaultfd mechnism to
request for a SIGBUS signal.

See following for previous discussion about the database requirement
leading to this proposal as suggested by Andrea.

http://www.spinics.net/lists/linux-mm/msg129224.html

Link: http://lkml.kernel.org/r/1501552446-748335-2-git-send-email-prakash.sangappa@oracle.com
Signed-off-by: Prakash Sangappa <prakash.sangappa@oracle.com>
Reviewed-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/userfaultfd.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 3b059530dac9..d39d5db56771 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -23,7 +23,8 @@
 			   UFFD_FEATURE_EVENT_REMOVE |	\
 			   UFFD_FEATURE_EVENT_UNMAP |		\
 			   UFFD_FEATURE_MISSING_HUGETLBFS |	\
-			   UFFD_FEATURE_MISSING_SHMEM)
+			   UFFD_FEATURE_MISSING_SHMEM |		\
+			   UFFD_FEATURE_SIGBUS)
 #define UFFD_API_IOCTLS				\
 	((__u64)1 << _UFFDIO_REGISTER |		\
 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
@@ -153,6 +154,12 @@ struct uffdio_api {
 	 * UFFD_FEATURE_MISSING_SHMEM works the same as
 	 * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
 	 * (i.e. tmpfs and other shmem based APIs).
+	 *
+	 * UFFD_FEATURE_SIGBUS feature means no page-fault
+	 * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead
+	 * a SIGBUS signal will be sent to the faulting process.
+	 * The application process can enable this behavior by adding
+	 * it to uffdio_api.features.
 	 */
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
 #define UFFD_FEATURE_EVENT_FORK			(1<<1)
@@ -161,6 +168,7 @@ struct uffdio_api {
 #define UFFD_FEATURE_MISSING_HUGETLBFS		(1<<4)
 #define UFFD_FEATURE_MISSING_SHMEM		(1<<5)
 #define UFFD_FEATURE_EVENT_UNMAP		(1<<6)
+#define UFFD_FEATURE_SIGBUS			(1<<7)
 	__u64 features;
 
 	__u64 ioctls;
-- 
cgit v1.2.3


From 9d4ac934829ac58c5109c49e6dfe677300e5e652 Mon Sep 17 00:00:00 2001
From: Alexey Perevalov <a.perevalov@samsung.com>
Date: Wed, 6 Sep 2017 16:23:56 -0700
Subject: userfaultfd: provide pid in userfault msg

It could be useful for calculating downtime during postcopy live
migration per vCPU.  Side observer or application itself will be
informed about proper task's sleep during userfaultfd processing.

Process's thread id is being provided when user requeste it by setting
UFFD_FEATURE_THREAD_ID bit into uffdio_api.features.

Link: http://lkml.kernel.org/r/20170802165145.22628-6-aarcange@redhat.com
Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/userfaultfd.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index d39d5db56771..2b24c28d99a7 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -24,7 +24,8 @@
 			   UFFD_FEATURE_EVENT_UNMAP |		\
 			   UFFD_FEATURE_MISSING_HUGETLBFS |	\
 			   UFFD_FEATURE_MISSING_SHMEM |		\
-			   UFFD_FEATURE_SIGBUS)
+			   UFFD_FEATURE_SIGBUS |		\
+			   UFFD_FEATURE_THREAD_ID)
 #define UFFD_API_IOCTLS				\
 	((__u64)1 << _UFFDIO_REGISTER |		\
 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
@@ -79,6 +80,7 @@ struct uffd_msg {
 		struct {
 			__u64	flags;
 			__u64	address;
+			__u32   ptid;
 		} pagefault;
 
 		struct {
@@ -158,8 +160,9 @@ struct uffdio_api {
 	 * UFFD_FEATURE_SIGBUS feature means no page-fault
 	 * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead
 	 * a SIGBUS signal will be sent to the faulting process.
-	 * The application process can enable this behavior by adding
-	 * it to uffdio_api.features.
+	 *
+	 * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will
+	 * be returned, if feature is not requested 0 will be returned.
 	 */
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
 #define UFFD_FEATURE_EVENT_FORK			(1<<1)
@@ -169,6 +172,7 @@ struct uffdio_api {
 #define UFFD_FEATURE_MISSING_SHMEM		(1<<5)
 #define UFFD_FEATURE_EVENT_UNMAP		(1<<6)
 #define UFFD_FEATURE_SIGBUS			(1<<7)
+#define UFFD_FEATURE_THREAD_ID			(1<<8)
 	__u64 features;
 
 	__u64 ioctls;
-- 
cgit v1.2.3


From a36985d31a65d5c0559fb582719e32eaf0ccec3b Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Wed, 6 Sep 2017 16:23:59 -0700
Subject: userfaultfd: provide pid in userfault msg - add feat union

No ABI change, but this will make it more explicit to software that ptid
is only available if requested by passing UFFD_FEATURE_THREAD_ID to
UFFDIO_API.  The fact it's a union will also self document it shouldn't
be taken for granted there's a tpid there.

Link: http://lkml.kernel.org/r/20170802165145.22628-7-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Alexey Perevalov <a.perevalov@samsung.com>
Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/userfaultfd.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 2b24c28d99a7..d6d1f65cb3c3 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -80,7 +80,9 @@ struct uffd_msg {
 		struct {
 			__u64	flags;
 			__u64	address;
-			__u32   ptid;
+			union {
+				__u32 ptid;
+			} feat;
 		} pagefault;
 
 		struct {
-- 
cgit v1.2.3


From 749df87bd7bee5a79cef073f5d032ddb2b211de8 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Wed, 6 Sep 2017 16:24:16 -0700
Subject: mm/shmem: add hugetlbfs support to memfd_create()

This patch came out of discussions in this e-mail thread:
  http://lkml.kernel.org/r/1499357846-7481-1-git-send-email-mike.kravetz%40oracle.com

The Oracle JVM team is developing a new garbage collection model.  This
new model requires multiple mappings of the same anonymous memory.  One
straight forward way to accomplish this is with memfd_create.  They can
use the returned fd to create multiple mappings of the same memory.

The JVM today has an option to use (static hugetlb) huge pages.  If this
option is specified, they would like to use the same garbage collection
model requiring multiple mappings to the same memory.  Using hugetlbfs,
it is possible to explicitly mount a filesystem and specify file paths
in order to get an fd that can be used for multiple mappings.  However,
this introduces additional system admin work and coordination.

Ideally they would like to get a hugetlbfs fd without requiring explicit
mounting of a filesystem.  Today, mmap and shmget can make use of
hugetlbfs without explicitly mounting a filesystem.  The patch adds this
functionality to memfd_create.

Add a new flag MFD_HUGETLB to memfd_create() that will specify the file
to be created resides in the hugetlbfs filesystem.  This is the generic
hugetlbfs filesystem not associated with any specific mount point.  As
with other system calls that request hugetlbfs backed pages, there is
the ability to encode huge page size in the flag arguments.

hugetlbfs does not support sealing operations, therefore specifying
MFD_ALLOW_SEALING with MFD_HUGETLB will result in EINVAL.

Of course, the memfd_man page would need updating if this type of
functionality moves forward.

Link: http://lkml.kernel.org/r/1502149672-7759-2-git-send-email-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/memfd.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h
index 534e364bda92..7f3a722dbd72 100644
--- a/include/uapi/linux/memfd.h
+++ b/include/uapi/linux/memfd.h
@@ -1,8 +1,32 @@
 #ifndef _UAPI_LINUX_MEMFD_H
 #define _UAPI_LINUX_MEMFD_H
 
+#include <asm-generic/hugetlb_encode.h>
+
 /* flags for memfd_create(2) (unsigned int) */
 #define MFD_CLOEXEC		0x0001U
 #define MFD_ALLOW_SEALING	0x0002U
+#define MFD_HUGETLB		0x0004U
+
+/*
+ * Huge page size encoding when MFD_HUGETLB is specified, and a huge page
+ * size other than the default is desired.  See hugetlb_encode.h.
+ * All known huge page size encodings are provided here.  It is the
+ * responsibility of the application to know which sizes are supported on
+ * the running system.  See mmap(2) man page for details.
+ */
+#define MFD_HUGE_SHIFT	HUGETLB_FLAG_ENCODE_SHIFT
+#define MFD_HUGE_MASK	HUGETLB_FLAG_ENCODE_MASK
+
+#define MFD_HUGE_64KB	HUGETLB_FLAG_ENCODE_64KB
+#define MFD_HUGE_512KB	HUGETLB_FLAG_ENCODE_512KB
+#define MFD_HUGE_1MB	HUGETLB_FLAG_ENCODE_1MB
+#define MFD_HUGE_2MB	HUGETLB_FLAG_ENCODE_2MB
+#define MFD_HUGE_8MB	HUGETLB_FLAG_ENCODE_8MB
+#define MFD_HUGE_16MB	HUGETLB_FLAG_ENCODE_16MB
+#define MFD_HUGE_256MB	HUGETLB_FLAG_ENCODE_256MB
+#define MFD_HUGE_1GB	HUGETLB_FLAG_ENCODE_1GB
+#define MFD_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
+#define MFD_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
 
 #endif /* _UAPI_LINUX_MEMFD_H */
-- 
cgit v1.2.3


From d2cd9ede6e193dd7d88b6d27399e96229a551b19 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 6 Sep 2017 16:25:15 -0700
Subject: mm,fork: introduce MADV_WIPEONFORK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce MADV_WIPEONFORK semantics, which result in a VMA being empty
in the child process after fork.  This differs from MADV_DONTFORK in one
important way.

If a child process accesses memory that was MADV_WIPEONFORK, it will get
zeroes.  The address ranges are still valid, they are just empty.

If a child process accesses memory that was MADV_DONTFORK, it will get a
segmentation fault, since those address ranges are no longer valid in
the child after fork.

Since MADV_DONTFORK also seems to be used to allow very large programs
to fork in systems with strict memory overcommit restrictions, changing
the semantics of MADV_DONTFORK might break existing programs.

MADV_WIPEONFORK only works on private, anonymous VMAs.

The use case is libraries that store or cache information, and want to
know that they need to regenerate it in the child process after fork.

Examples of this would be:
 - systemd/pulseaudio API checks (fail after fork) (replacing a getpid
   check, which is too slow without a PID cache)
 - PKCS#11 API reinitialization check (mandated by specification)
 - glibc's upcoming PRNG (reseed after fork)
 - OpenSSL PRNG (reseed after fork)

The security benefits of a forking server having a re-inialized PRNG in
every child process are pretty obvious.  However, due to libraries
having all kinds of internal state, and programs getting compiled with
many different versions of each library, it is unreasonable to expect
calling programs to re-initialize everything manually after fork.

A further complication is the proliferation of clone flags, programs
bypassing glibc's functions to call clone directly, and programs calling
unshare, causing the glibc pthread_atfork hook to not get called.

It would be better to have the kernel take care of this automatically.

The patch also adds MADV_KEEPONFORK, to undo the effects of a prior
MADV_WIPEONFORK.

This is similar to the OpenBSD minherit syscall with MAP_INHERIT_ZERO:

    https://man.openbsd.org/minherit.2

[akpm@linux-foundation.org: numerically order arch/parisc/include/uapi/asm/mman.h #defines]
Link: http://lkml.kernel.org/r/20170811212829.29186-3-riel@redhat.com
Signed-off-by: Rik van Riel <riel@redhat.com>
Reported-by: Florian Weimer <fweimer@redhat.com>
Reported-by: Colm MacCártaigh <colm@allcosts.net>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Drewry <wad@chromium.org>
Cc: <linux-api@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/asm-generic/mman-common.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index d248f3c335b5..203268f9231e 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -58,6 +58,9 @@
 					   overrides the coredump filter bits */
 #define MADV_DODUMP	17		/* Clear the MADV_DONTDUMP flag */
 
+#define MADV_WIPEONFORK 18		/* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
-- 
cgit v1.2.3


From 3dd8f7c3b78b9556582fd64bf5c9986723f9dca1 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Fri, 8 Sep 2017 16:16:30 -0700
Subject: autofs: make dev ioctl version and ismountpoint user accessible

Some of the autofs miscellaneous device ioctls need to be accessable to
user space applications without CAP_SYS_ADMIN to get information about
autofs mounts.

Link: http://lkml.kernel.org/r/150216642517.11652.2338933266137331637.stgit@pluto.themaw.net
Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Colin Walters <walters@redhat.com>
Cc: Ondrej Holy <oholy@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/auto_dev-ioctl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/auto_dev-ioctl.h b/include/uapi/linux/auto_dev-ioctl.h
index 744b3d060968..5558db8e6646 100644
--- a/include/uapi/linux/auto_dev-ioctl.h
+++ b/include/uapi/linux/auto_dev-ioctl.h
@@ -16,7 +16,7 @@
 #define AUTOFS_DEVICE_NAME		"autofs"
 
 #define AUTOFS_DEV_IOCTL_VERSION_MAJOR	1
-#define AUTOFS_DEV_IOCTL_VERSION_MINOR	0
+#define AUTOFS_DEV_IOCTL_VERSION_MINOR	1
 
 #define AUTOFS_DEV_IOCTL_SIZE		sizeof(struct autofs_dev_ioctl)
 
-- 
cgit v1.2.3


From 1f28c5d055032e7e8ee5e48198dca7e125d0eec6 Mon Sep 17 00:00:00 2001
From: Tomohiro Kusumi <tkusumi@tuxera.com>
Date: Fri, 8 Sep 2017 16:16:34 -0700
Subject: autofs: remove unused AUTOFS_IOC_EXPIRE_DIRECT/INDIRECT

These are not used by either kernel or userspace, although
AUTOFS_IOC_EXPIRE_DIRECT once seems to have been used by userspace in
around 2006-2008, which was technically just an alias of the existing
ioctl AUTOFS_IOC_EXPIRE_MULTI.

ioctls for autofs are already complicated enough that they could be
removed unless these are staying here to be able to compile userspace code
of certain period of time from a decade ago.

Edit: raven@themaw.net
Yes, this is indeed very old and anything that still uses must
be updated becuase it will be using broken functionality.
End edit: raven@themaw.net

Link: http://lkml.kernel.org/r/150285067347.4670.11494624644273072003.stgit@pluto.themaw.net
Signed-off-by: Tomohiro Kusumi <tkusumi@tuxera.com>
Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/auto_fs4.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h
index 7c6da423d54e..9453e9a07c9d 100644
--- a/include/uapi/linux/auto_fs4.h
+++ b/include/uapi/linux/auto_fs4.h
@@ -155,8 +155,6 @@ enum {
 };
 
 #define AUTOFS_IOC_EXPIRE_MULTI    _IOW(AUTOFS_IOCTL, AUTOFS_IOC_EXPIRE_MULTI_CMD, int)
-#define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI
-#define AUTOFS_IOC_EXPIRE_DIRECT   AUTOFS_IOC_EXPIRE_MULTI
 #define AUTOFS_IOC_PROTOSUBVER     _IOR(AUTOFS_IOCTL, AUTOFS_IOC_PROTOSUBVER_CMD, int)
 #define AUTOFS_IOC_ASKUMOUNT       _IOR(AUTOFS_IOCTL, AUTOFS_IOC_ASKUMOUNT_CMD, int)
 
-- 
cgit v1.2.3


From a2d818030135c293f878fbb772cf40e7a14c5acc Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 8 Sep 2017 16:17:19 -0700
Subject: drivers/pps: aesthetic tweaks to PPS-related content

Collection of aesthetic adjustments to various PPS-related files,
directories and Documentation, some quite minor just for the sake of
consistency, including:

 * Updated example of pps device tree node (courtesy Rodolfo G.)
 * "PPS-API" -> "PPS API"
 * "pps_source_info_s" -> "pps_source_info"
 * "ktimer driver" -> "pps-ktimer driver"
 * "ppstest /dev/pps0" -> "ppstest /dev/pps1" to match example
 * Add missing PPS-related entries to MAINTAINERS file
 * Other trivialities

Link: http://lkml.kernel.org/r/alpine.LFD.2.20.1708261048220.8106@localhost.localdomain
Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Acked-by: Rodolfo Giometti <giometti@enneenne.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/pps.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/pps.h b/include/uapi/linux/pps.h
index c1cb3825a8bc..c29d6b791c08 100644
--- a/include/uapi/linux/pps.h
+++ b/include/uapi/linux/pps.h
@@ -95,8 +95,8 @@ struct pps_kparams {
 #define PPS_CAPTURECLEAR	0x02	/* capture clear events */
 #define PPS_CAPTUREBOTH		0x03	/* capture assert and clear events */
 
-#define PPS_OFFSETASSERT	0x10	/* apply compensation for assert ev. */
-#define PPS_OFFSETCLEAR		0x20	/* apply compensation for clear ev. */
+#define PPS_OFFSETASSERT	0x10	/* apply compensation for assert event */
+#define PPS_OFFSETCLEAR		0x20	/* apply compensation for clear event */
 
 #define PPS_CANWAIT		0x100	/* can we wait for an event? */
 #define PPS_CANPOLL		0x200	/* bit reserved for future use */
-- 
cgit v1.2.3


From 9beb8bedb05c5f3a353dba62b8fa7cbbb9ec685e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 9 Sep 2017 01:40:35 +0200
Subject: bpf: make error reporting in bpf_warn_invalid_xdp_action more clear

Differ between illegal XDP action code and just driver
unsupported one to provide better feedback when we throw
a one-time warning here. Reason is that with 814abfabef3c
("xdp: add bpf_redirect helper function") not all drivers
support the new XDP return code yet and thus they will
fall into their 'default' case when checking for return
codes after program return, which then triggers a
bpf_warn_invalid_xdp_action() stating that the return
code is illegal, but from XDP perspective it's not.

I decided not to place something like a XDP_ACT_MAX define
into uapi i) given we don't have this either for all other
program types, ii) future action codes could have further
encoding there, which would render such define unsuitable
and we wouldn't be able to rip it out again, and iii) we
rarely add new action codes.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ba848b761cfb..43ab5c402f98 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -766,8 +766,8 @@ struct bpf_sock {
 
 /* User return codes for XDP prog type.
  * A valid XDP program must return one of these defined values. All other
- * return codes are reserved for future use. Unknown return codes will result
- * in packet drop.
+ * return codes are reserved for future use. Unknown return codes will
+ * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
  */
 enum xdp_action {
 	XDP_ABORTED = 0,
-- 
cgit v1.2.3


From 19cab8872692960535aa6d12e3a295ac51d1a648 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 20 Sep 2017 15:52:13 -0700
Subject: net: ethtool: Add back transceiver type

Commit 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API")
deprecated the ethtool_cmd::transceiver field, which was fine in
premise, except that the PHY library was actually using it to report the
type of transceiver: internal or external.

Use the first word of the reserved field to put this __u8 transceiver
field back in. It is made read-only, and we don't expect the
ETHTOOL_xLINKSETTINGS API to be doing anything with this anyway, so this
is mostly for the legacy path where we do:

ethtool_get_settings()
-> dev->ethtool_ops->get_link_ksettings()
   -> convert_link_ksettings_to_legacy_settings()

to have no information loss compared to the legacy get_settings API.

Fixes: 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 9c041dae8e2c..5bd1b1de4ea0 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1753,6 +1753,8 @@ enum ethtool_reset_flags {
  *	%ethtool_link_mode_bit_indices for the link modes, and other
  *	link features that the link partner advertised through
  *	autonegotiation; 0 if unknown or not applicable.  Read-only.
+ * @transceiver: Used to distinguish different possible PHY types,
+ *	reported consistently by PHYLIB.  Read-only.
  *
  * If autonegotiation is disabled, the speed and @duplex represent the
  * fixed link mode and are writable if the driver supports multiple
@@ -1804,7 +1806,9 @@ struct ethtool_link_settings {
 	__u8	eth_tp_mdix;
 	__u8	eth_tp_mdix_ctrl;
 	__s8	link_mode_masks_nwords;
-	__u32	reserved[8];
+	__u8	transceiver;
+	__u8	reserved1[3];
+	__u32	reserved[7];
 	__u32	link_mode_masks[0];
 	/* layout of link_mode_masks fields:
 	 * __u32 map_supported[link_mode_masks_nwords];
-- 
cgit v1.2.3