From e40fbbf0572c5e41dc87ad79001748ed399ce32d Mon Sep 17 00:00:00 2001
From: Usama Arif <usama.arif@bytedance.com>
Date: Wed, 19 Jan 2022 11:44:40 +0000
Subject: uapi/bpf: Add missing description and returns for helper
 documentation

Both description and returns section will become mandatory
for helpers and syscalls in a later commit to generate man pages.

This commit also adds in the documentation that BPF_PROG_RUN is
an alias for BPF_PROG_TEST_RUN for anyone searching for the
syscall in the generated man pages.

Signed-off-by: Usama Arif <usama.arif@bytedance.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220119114442.1452088-1-usama.arif@bytedance.com
---
 include/uapi/linux/bpf.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b0383d371b9a..a9c96c21330a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -330,6 +330,8 @@ union bpf_iter_link_info {
  *			*ctx_out*, *data_in* and *data_out* must be NULL.
  *			*repeat* must be zero.
  *
+ *		BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*
  *		is set appropriately.
@@ -1775,6 +1777,8 @@ union bpf_attr {
  * 		0 on success, or a negative error in case of failure.
  *
  * u64 bpf_get_current_pid_tgid(void)
+ * 	Description
+ * 		Get the current pid and tgid.
  * 	Return
  * 		A 64-bit integer containing the current tgid and pid, and
  * 		created as such:
@@ -1782,6 +1786,8 @@ union bpf_attr {
  * 		*current_task*\ **->pid**.
  *
  * u64 bpf_get_current_uid_gid(void)
+ * 	Description
+ * 		Get the current uid and gid.
  * 	Return
  * 		A 64-bit integer containing the current GID and UID, and
  * 		created as such: *current_gid* **<< 32 \|** *current_uid*.
@@ -2256,6 +2262,8 @@ union bpf_attr {
  * 		The 32-bit hash.
  *
  * u64 bpf_get_current_task(void)
+ * 	Description
+ * 		Get the current task.
  * 	Return
  * 		A pointer to the current task struct.
  *
@@ -2369,6 +2377,8 @@ union bpf_attr {
  * 		indicate that the hash is outdated and to trigger a
  * 		recalculation the next time the kernel tries to access this
  * 		hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ * 	Return
+ * 		void.
  *
  * long bpf_get_numa_node_id(void)
  * 	Description
@@ -2466,6 +2476,8 @@ union bpf_attr {
  * 		A 8-byte long unique number or 0 if *sk* is NULL.
  *
  * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * 	Description
+ * 		Get the owner UID of the socked associated to *skb*.
  * 	Return
  * 		The owner UID of the socket associated to *skb*. If the socket
  * 		is **NULL**, or if it is not a full socket (i.e. if it is a
@@ -3240,6 +3252,9 @@ union bpf_attr {
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
  * u64 bpf_get_current_cgroup_id(void)
+ * 	Description
+ * 		Get the current cgroup id based on the cgroup within which
+ * 		the current task is running.
  * 	Return
  * 		A 64-bit integer containing the current cgroup id based
  * 		on the cgroup within which the current task is running.
-- 
cgit v1.2.3


From b44123b4a3dcad4664d3a0f72c011ffd4c9c4d93 Mon Sep 17 00:00:00 2001
From: YiFei Zhu <zhuyifei@google.com>
Date: Thu, 16 Dec 2021 02:04:27 +0000
Subject: bpf: Add cgroup helpers bpf_{get,set}_retval to get/set syscall
 return value

The helpers continue to use int for retval because all the hooks
are int-returning rather than long-returning. The return value of
bpf_set_retval is int for future-proofing, in case in the future
there may be errors trying to set the retval.

After the previous patch, if a program rejects a syscall by
returning 0, an -EPERM will be generated no matter if the retval
is already set to -err. This patch change it being forced only if
retval is not -err. This is because we want to support, for
example, invoking bpf_set_retval(-EINVAL) and return 0, and have
the syscall return value be -EINVAL not -EPERM.

For BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY, the prior behavior is
that, if the return value is NET_XMIT_DROP, the packet is silently
dropped. We preserve this behavior for backward compatibility
reasons, so even if an errno is set, the errno does not return to
caller. However, setting a non-err to retval cannot propagate so
this is not allowed and we return a -EFAULT in that case.

Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/b4013fd5d16bed0b01977c1fafdeae12e1de61fb.1639619851.git.zhuyifei@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a9c96c21330a..fe2272defcd9 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5033,6 +5033,22 @@ union bpf_attr {
  *
  *	Return
  *		The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ *	Description
+ *		Get the syscall's return value that will be returned to userspace.
+ *
+ *		This helper is currently supported by cgroup programs only.
+ *	Return
+ *		The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ *	Description
+ *		Set the syscall's return value that will be returned to userspace.
+ *
+ *		This helper is currently supported by cgroup programs only.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5221,6 +5237,8 @@ union bpf_attr {
 	FN(get_func_arg),		\
 	FN(get_func_ret),		\
 	FN(get_func_arg_cnt),		\
+	FN(get_retval),			\
+	FN(set_retval),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From c2f2cdbeffda7b153c19e0f3d73149c41026c0db Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 21 Jan 2022 11:09:52 +0100
Subject: bpf: introduce BPF_F_XDP_HAS_FRAGS flag in prog_flags loading the
 ebpf program

Introduce BPF_F_XDP_HAS_FRAGS and the related field in bpf_prog_aux
in order to notify the driver the loaded program support xdp frags.

Acked-by: Toke Hoiland-Jorgensen <toke@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/db2e8075b7032a356003f407d1b0deb99adaa0ed.1642758637.git.lorenzo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index fe2272defcd9..945649c67e03 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1113,6 +1113,11 @@ enum bpf_link_type {
  */
 #define BPF_F_SLEEPABLE		(1U << 4)
 
+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS	(1U << 5)
+
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * the following extensions:
  *
-- 
cgit v1.2.3


From 0165cc817075cf701e4289838f1d925ff1911b3e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 21 Jan 2022 11:09:54 +0100
Subject: bpf: introduce bpf_xdp_get_buff_len helper

Introduce bpf_xdp_get_buff_len helper in order to return the xdp buffer
total size (linear and paged area)

Acked-by: Toke Hoiland-Jorgensen <toke@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/aac9ac3504c84026cf66a3c71b7c5ae89bc991be.1642758637.git.lorenzo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 945649c67e03..5a28772063f6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5054,6 +5054,12 @@ union bpf_attr {
  *		This helper is currently supported by cgroup programs only.
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ *	Description
+ *		Get the total size of a given xdp buff (linear and paged area)
+ *	Return
+ *		The total size of a given xdp buffer.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5244,6 +5250,7 @@ union bpf_attr {
 	FN(get_func_arg_cnt),		\
 	FN(get_retval),			\
 	FN(set_retval),			\
+	FN(xdp_get_buff_len),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 3f364222d032eea6b245780e845ad213dab28cdd Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 21 Jan 2022 11:10:03 +0100
Subject: net: xdp: introduce bpf_xdp_pointer utility routine

Similar to skb_header_pointer, introduce bpf_xdp_pointer utility routine
to return a pointer to a given position in the xdp_buff if the requested
area (offset + len) is contained in a contiguous memory area otherwise it
will be copied in a bounce buffer provided by the caller.
Similar to the tc counterpart, introduce the two following xdp helpers:
- bpf_xdp_load_bytes
- bpf_xdp_store_bytes

Reviewed-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Toke Hoiland-Jorgensen <toke@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/ab285c1efdd5b7a9d361348b1e7d3ef49f6382b3.1642758637.git.lorenzo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 5a28772063f6..16a7574292a5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5060,6 +5060,22 @@ union bpf_attr {
  *		Get the total size of a given xdp buff (linear and paged area)
  *	Return
  *		The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *	Description
+ *		This helper is provided as an easy way to load data from a
+ *		xdp buffer. It can be used to load *len* bytes from *offset* from
+ *		the frame associated to *xdp_md*, into the buffer pointed by
+ *		*buf*.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *	Description
+ *		Store *len* bytes from buffer *buf* into the frame
+ *		associated to *xdp_md*, at *offset*.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5251,6 +5267,8 @@ union bpf_attr {
 	FN(get_retval),			\
 	FN(set_retval),			\
 	FN(xdp_get_buff_len),		\
+	FN(xdp_load_bytes),		\
+	FN(xdp_store_bytes),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 376040e47334c6dc6a939a32197acceb00fe4acf Mon Sep 17 00:00:00 2001
From: Kenny Yu <kennyyu@fb.com>
Date: Mon, 24 Jan 2022 10:54:01 -0800
Subject: bpf: Add bpf_copy_from_user_task() helper

This adds a helper for bpf programs to read the memory of other
tasks.

As an example use case at Meta, we are using a bpf task iterator program
and this new helper to print C++ async stack traces for all threads of
a given process.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220124185403.468466-3-kennyyu@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 16a7574292a5..4a2f7041ebae 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5076,6 +5076,16 @@ union bpf_attr {
  *		associated to *xdp_md*, at *offset*.
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
+ *	Description
+ *		Read *size* bytes from user space address *user_ptr* in *tsk*'s
+ *		address space, and stores the data in *dst*. *flags* is not
+ *		used yet and is provided for future extensibility. This helper
+ *		can only be used by sleepable programs.
+ *	Return
+ *		0 on success, or a negative error in case of failure. On error
+ *		*dst* buffer is zeroed out.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5269,6 +5279,7 @@ union bpf_attr {
 	FN(xdp_get_buff_len),		\
 	FN(xdp_load_bytes),		\
 	FN(xdp_store_bytes),		\
+	FN(copy_from_user_task),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 9690ae60429020f38e4aa2540c306f27eb021bc0 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 27 Jan 2022 10:42:59 -0800
Subject: ethtool: add header/data split indication

For applications running on a mix of platforms it's useful
to have a clear indication whether host's NIC supports the
geometry requirements of TCP zero-copy. TCP zero-copy Rx
requires data to be neatly placed into memory pages.
Most NICs can't do that.

This patch is adding GET support only, since the NICs
I work with either always have the feature enabled or
enable it whenever MTU is set to jumbo. In other words
I don't need SET. But adding set should be trivial.
(The only note on SET is that we will likely want
the setting to be "sticky" and use 0 / `unknown`
to reset it back to driver default.)

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool_netlink.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index cca6e474a085..417d4280d7b5 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -318,6 +318,12 @@ enum {
 
 /* RINGS */
 
+enum {
+	ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0,
+	ETHTOOL_TCP_DATA_SPLIT_DISABLED,
+	ETHTOOL_TCP_DATA_SPLIT_ENABLED,
+};
+
 enum {
 	ETHTOOL_A_RINGS_UNSPEC,
 	ETHTOOL_A_RINGS_HEADER,				/* nest - _A_HEADER_* */
@@ -330,6 +336,7 @@ enum {
 	ETHTOOL_A_RINGS_RX_JUMBO,			/* u32 */
 	ETHTOOL_A_RINGS_TX,				/* u32 */
 	ETHTOOL_A_RINGS_RX_BUF_LEN,                     /* u32 */
+	ETHTOOL_A_RINGS_TCP_DATA_SPLIT,			/* u8 */
 
 	/* add new constants above here */
 	__ETHTOOL_A_RINGS_CNT,
-- 
cgit v1.2.3


From b2a90f4fcb146d0e033203ab646f0fd22cfa947f Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Thu, 13 Jan 2022 11:20:22 +0100
Subject: media: lirc: remove unused lirc features

These features have never been implemented by any lirc driver, including
staging or out of tree drivers. The ioctls for these feaures were removed
in commit d55f09abe24b ("[media] lirc.h: remove several unused ioctls").

So, we can safely remove them.

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
 include/uapi/linux/lirc.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h
index 9919f2062b14..a1f9c26ea537 100644
--- a/include/uapi/linux/lirc.h
+++ b/include/uapi/linux/lirc.h
@@ -72,11 +72,9 @@
 #define LIRC_CAN_SET_REC_CARRIER       (LIRC_CAN_SET_SEND_CARRIER << 16)
 #define LIRC_CAN_SET_REC_DUTY_CYCLE    (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16)
 
-#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000
 #define LIRC_CAN_SET_REC_CARRIER_RANGE    0x80000000
 #define LIRC_CAN_GET_REC_RESOLUTION       0x20000000
 #define LIRC_CAN_SET_REC_TIMEOUT          0x10000000
-#define LIRC_CAN_SET_REC_FILTER           0x08000000
 
 #define LIRC_CAN_MEASURE_CARRIER          0x02000000
 #define LIRC_CAN_USE_WIDEBAND_RECEIVER    0x04000000
@@ -84,8 +82,6 @@
 #define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK)
 #define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK)
 
-#define LIRC_CAN_NOTIFY_DECODE            0x01000000
-
 /*** IOCTL commands for lirc driver ***/
 
 #define LIRC_GET_FEATURES              _IOR('i', 0x00000000, __u32)
-- 
cgit v1.2.3


From 68a99f6a0ebfe9101ea79ba5af1c407a5ad4f629 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Sat, 15 Jan 2022 11:19:11 +0100
Subject: media: lirc: report ir receiver overflow

If the driver reports that the hardware had an overflow, report this to
userspace. It would be nice to know when this happens, and not just get
a long space.

This change has been tested with lircd, ir-ctl, and ir-keytable.

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
 include/uapi/linux/lirc.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h
index a1f9c26ea537..21c69a6a100d 100644
--- a/include/uapi/linux/lirc.h
+++ b/include/uapi/linux/lirc.h
@@ -16,14 +16,16 @@
 #define LIRC_MODE2_PULSE     0x01000000
 #define LIRC_MODE2_FREQUENCY 0x02000000
 #define LIRC_MODE2_TIMEOUT   0x03000000
+#define LIRC_MODE2_OVERFLOW  0x04000000
 
 #define LIRC_VALUE_MASK      0x00FFFFFF
 #define LIRC_MODE2_MASK      0xFF000000
 
-#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE)
-#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE)
-#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY)
-#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT)
+#define LIRC_SPACE(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_SPACE)
+#define LIRC_PULSE(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_PULSE)
+#define LIRC_FREQUENCY(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY)
+#define LIRC_TIMEOUT(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT)
+#define LIRC_OVERFLOW(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_OVERFLOW)
 
 #define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK)
 #define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK)
@@ -32,6 +34,7 @@
 #define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE)
 #define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY)
 #define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT)
+#define LIRC_IS_OVERFLOW(val) (LIRC_MODE2(val) == LIRC_MODE2_OVERFLOW)
 
 /* used heavily by lirc userspace */
 #define lirc_t int
-- 
cgit v1.2.3


From e187013abeb4c2a7ec8a4bb978844c7e92a1a6ec Mon Sep 17 00:00:00 2001
From: Akhmat Karakotov <hmukos@yandex-team.ru>
Date: Mon, 31 Jan 2022 16:31:21 +0300
Subject: txhash: Make rethinking txhash behavior configurable via sysctl

Add a per ns sysctl that controls the txhash rethink behavior:
net.core.txrehash. When enabled, the same behavior is retained,
when disabled, rethink is not performed. Sysctl is enabled by default.

Signed-off-by: Akhmat Karakotov <hmukos@yandex-team.ru>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/socket.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
index eb0a9a5b6e71..0accd6102ece 100644
--- a/include/uapi/linux/socket.h
+++ b/include/uapi/linux/socket.h
@@ -31,4 +31,7 @@ struct __kernel_sockaddr_storage {
 
 #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
 
+#define SOCK_TXREHASH_DISABLED	0
+#define SOCK_TXREHASH_ENABLED	1
+
 #endif /* _UAPI_LINUX_SOCKET_H */
-- 
cgit v1.2.3


From 26859240e4ee701e0379f08634957adaff67e43a Mon Sep 17 00:00:00 2001
From: Akhmat Karakotov <hmukos@yandex-team.ru>
Date: Mon, 31 Jan 2022 16:31:22 +0300
Subject: txhash: Add socket option to control TX hash rethink behavior

Add the SO_TXREHASH socket option to control hash rethink behavior per socket.
When default mode is set, sockets disable rehash at initialization and use
sysctl option when entering listen state. setsockopt() overrides default
behavior.

Signed-off-by: Akhmat Karakotov <hmukos@yandex-team.ru>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/socket.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
index 0accd6102ece..51d6bb2f6765 100644
--- a/include/uapi/linux/socket.h
+++ b/include/uapi/linux/socket.h
@@ -31,6 +31,7 @@ struct __kernel_sockaddr_storage {
 
 #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
 
+#define SOCK_TXREHASH_DEFAULT	((u8)-1)
 #define SOCK_TXREHASH_DISABLED	0
 #define SOCK_TXREHASH_ENABLED	1
 
-- 
cgit v1.2.3


From 4421a582718ab81608d8486734c18083b822390d Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sun, 30 Jan 2022 12:55:17 +0100
Subject: bpf: Make dst_port field in struct bpf_sock 16-bit wide

Menglong Dong reports that the documentation for the dst_port field in
struct bpf_sock is inaccurate and confusing. From the BPF program PoV, the
field is a zero-padded 16-bit integer in network byte order. The value
appears to the BPF user as if laid out in memory as so:

  offsetof(struct bpf_sock, dst_port) + 0  <port MSB>
                                      + 8  <port LSB>
                                      +16  0x00
                                      +24  0x00

32-, 16-, and 8-bit wide loads from the field are all allowed, but only if
the offset into the field is 0.

32-bit wide loads from dst_port are especially confusing. The loaded value,
after converting to host byte order with bpf_ntohl(dst_port), contains the
port number in the upper 16-bits.

Remove the confusion by splitting the field into two 16-bit fields. For
backward compatibility, allow 32-bit wide loads from offsetof(struct
bpf_sock, dst_port).

While at it, allow loads 8-bit loads at offset [0] and [1] from dst_port.

Reported-by: Menglong Dong <imagedong@tencent.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/r/20220130115518.213259-2-jakub@cloudflare.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4a2f7041ebae..a7f0ddedac1f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5574,7 +5574,8 @@ struct bpf_sock {
 	__u32 src_ip4;
 	__u32 src_ip6[4];
 	__u32 src_port;		/* host byte order */
-	__u32 dst_port;		/* network byte order */
+	__be16 dst_port;	/* network byte order */
+	__u16 :16;		/* zero padding */
 	__u32 dst_ip4;
 	__u32 dst_ip6[4];
 	__u32 state;
-- 
cgit v1.2.3


From bfdf4e6208051ed7165b2e92035b4bf11f43eb63 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 27 Jan 2022 10:27:20 -0500
Subject: rseq: Remove broken uapi field layout on 32-bit little endian

The rseq rseq_cs.ptr.{ptr32,padding} uapi endianness handling is
entirely wrong on 32-bit little endian: a preprocessor logic mistake
wrongly uses the big endian field layout on 32-bit little endian
architectures.

Fortunately, those ptr32 accessors were never used within the kernel,
and only meant as a convenience for user-space.

Remove those and replace the whole rseq_cs union by a __u64 type, as
this is the only thing really needed to express the ABI. Document how
32-bit architectures are meant to interact with this field.

Fixes: ec9c82e03a74 ("rseq: uapi: Declare rseq_cs field as union, update includes")
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220127152720.25898-1-mathieu.desnoyers@efficios.com
---
 include/uapi/linux/rseq.h | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index 9a402fdb60e9..77ee207623a9 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -105,23 +105,11 @@ struct rseq {
 	 * Read and set by the kernel. Set by user-space with single-copy
 	 * atomicity semantics. This field should only be updated by the
 	 * thread which registered this data structure. Aligned on 64-bit.
+	 *
+	 * 32-bit architectures should update the low order bits of the
+	 * rseq_cs field, leaving the high order bits initialized to 0.
 	 */
-	union {
-		__u64 ptr64;
-#ifdef __LP64__
-		__u64 ptr;
-#else
-		struct {
-#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN)
-			__u32 padding;		/* Initialized to zero. */
-			__u32 ptr32;
-#else /* LITTLE */
-			__u32 ptr32;
-			__u32 padding;		/* Initialized to zero. */
-#endif /* ENDIAN */
-		} ptr;
-#endif
-	} rseq_cs;
+	__u64 rseq_cs;
 
 	/*
 	 * Restartable sequences flags field.
-- 
cgit v1.2.3


From e4b1eb24ce5a696ef7229f9926ff34d7502f0582 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 27 Jan 2022 11:34:53 -0800
Subject: thermal: netlink: Add a new event to notify CPU capabilities change

Add a new netlink event to notify change in CPU capabilities in terms of
performance and efficiency.

Firmware may change CPU capabilities as a result of thermal events in the
system or to account for changes in the TDP (thermal design power) level.

This notification type will allow user space to avoid running workloads
on certain CPUs or proactively adjust power limits to avoid future events.

The netlink message consists of a nested attribute
(THERMAL_GENL_ATTR_CPU_CAPABILITY) with three attributes:

 * THERMAL_GENL_ATTR_CPU_CAPABILITY_ID (type u32):
   -- logical CPU number
 * THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE (type u32):
   -- Scaled performance from 0-1023
 * THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY (type u32):
   -- Scaled efficiency from 0-1023

Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/uapi/linux/thermal.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h
index 9aa2fedfa309..fc78bf3aead7 100644
--- a/include/uapi/linux/thermal.h
+++ b/include/uapi/linux/thermal.h
@@ -44,7 +44,10 @@ enum thermal_genl_attr {
 	THERMAL_GENL_ATTR_CDEV_MAX_STATE,
 	THERMAL_GENL_ATTR_CDEV_NAME,
 	THERMAL_GENL_ATTR_GOV_NAME,
-
+	THERMAL_GENL_ATTR_CPU_CAPABILITY,
+	THERMAL_GENL_ATTR_CPU_CAPABILITY_ID,
+	THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE,
+	THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY,
 	__THERMAL_GENL_ATTR_MAX,
 };
 #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
@@ -71,6 +74,7 @@ enum thermal_genl_event {
 	THERMAL_GENL_EVENT_CDEV_DELETE,		/* Cdev unbound */
 	THERMAL_GENL_EVENT_CDEV_STATE_UPDATE,	/* Cdev state updated */
 	THERMAL_GENL_EVENT_TZ_GOV_CHANGE,	/* Governor policy changed  */
+	THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE,	/* CPU capability changed */
 	__THERMAL_GENL_EVENT_MAX,
 };
 #define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1)
-- 
cgit v1.2.3


From 8b5413647262dda8d8d0e07e14ea1de9ac7cf0b2 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 17 Jan 2022 21:56:13 +0100
Subject: netfilter: nfqueue: enable to get skb->priority

This info could be useful to improve traffic analysis.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nfnetlink_queue.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h
index aed90c4df0c8..ef7c97f21a15 100644
--- a/include/uapi/linux/netfilter/nfnetlink_queue.h
+++ b/include/uapi/linux/netfilter/nfnetlink_queue.h
@@ -61,6 +61,7 @@ enum nfqnl_attr_type {
 	NFQA_SECCTX,			/* security context string */
 	NFQA_VLAN,			/* nested attribute: packet vlan info */
 	NFQA_L2HDR,			/* full L2 header */
+	NFQA_PRIORITY,			/* skb->priority */
 
 	__NFQA_MAX
 };
-- 
cgit v1.2.3


From be847673cfffce8bb6e9ed6ae186081280c58831 Mon Sep 17 00:00:00 2001
From: Justin Iurman <justin.iurman@uliege.be>
Date: Wed, 2 Feb 2022 15:25:53 +0100
Subject: uapi: ioam: Insertion frequency

Add the insertion frequency uapi for IOAM lwtunnels.

Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ioam6_iptunnel.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h
index 829ffdfcacca..38f6a8fdfd34 100644
--- a/include/uapi/linux/ioam6_iptunnel.h
+++ b/include/uapi/linux/ioam6_iptunnel.h
@@ -41,6 +41,15 @@ enum {
 	/* IOAM Trace Header */
 	IOAM6_IPTUNNEL_TRACE,		/* struct ioam6_trace_hdr */
 
+	/* Insertion frequency:
+	 * "k over n" packets (0 < k <= n)
+	 * [0.0001% ... 100%]
+	 */
+#define IOAM6_IPTUNNEL_FREQ_MIN 1
+#define IOAM6_IPTUNNEL_FREQ_MAX 1000000
+	IOAM6_IPTUNNEL_FREQ_K,		/* u32 */
+	IOAM6_IPTUNNEL_FREQ_N,		/* u32 */
+
 	__IOAM6_IPTUNNEL_MAX,
 };
 
-- 
cgit v1.2.3


From 3698807094ecae945436921325f5c309d1123f11 Mon Sep 17 00:00:00 2001
From: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Date: Tue, 24 Aug 2021 16:13:41 -0400
Subject: drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs

Checkpoint-Restore in userspace (CRIU) is a powerful tool that can
snapshot a running process and later restore it on same or a remote
machine but expects the processes that have a device file (e.g. GPU)
associated with them, provide necessary driver support to assist CRIU
and its extensible plugin interface. Thus, In order to support the
Checkpoint-Restore of any ROCm process, the AMD Radeon Open Compute
Kernel driver, needs to provide a set of new APIs that provide
necessary VRAM metadata and its contents to a userspace component
(CRIU plugin) that can store it in form of image files.

This introduces some new ioctls which will be used to checkpoint-Restore
any KFD bound user process. KFD only allows ioctl calls from the same
process that opened the KFD file descriptor. Since these ioctls are
expected to be called from a KFD criu plugin which has elevated ptrace
attached privileges and CAP_CHECKPOINT_RESTORE capabilities attached with
the file descriptors so modify KFD to allow such calls.

(API redesigned by David Yat Sin)
Suggested-by: Felix Kuehling <felix.kuehling@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: David Yat Sin <david.yatsin@amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 81 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 80 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index af96af174dc4..49429a6c42fc 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -468,6 +468,82 @@ struct kfd_ioctl_smi_events_args {
 	__u32 anon_fd;	/* from KFD */
 };
 
+/**************************************************************************************************
+ * CRIU IOCTLs (Checkpoint Restore In Userspace)
+ *
+ * When checkpointing a process, the userspace application will perform:
+ * 1. PROCESS_INFO op to determine current process information. This pauses execution and evicts
+ *    all the queues.
+ * 2. CHECKPOINT op to checkpoint process contents (BOs, queues, events, svm-ranges)
+ * 3. UNPAUSE op to un-evict all the queues
+ *
+ * When restoring a process, the CRIU userspace application will perform:
+ *
+ * 1. RESTORE op to restore process contents
+ * 2. RESUME op to start the process
+ *
+ * Note: Queues are forced into an evicted state after a successful PROCESS_INFO. User
+ * application needs to perform an UNPAUSE operation after calling PROCESS_INFO.
+ */
+
+enum kfd_criu_op {
+	KFD_CRIU_OP_PROCESS_INFO,
+	KFD_CRIU_OP_CHECKPOINT,
+	KFD_CRIU_OP_UNPAUSE,
+	KFD_CRIU_OP_RESTORE,
+	KFD_CRIU_OP_RESUME,
+};
+
+/**
+ * kfd_ioctl_criu_args - Arguments perform CRIU operation
+ * @devices:		[in/out] User pointer to memory location for devices information.
+ * 			This is an array of type kfd_criu_device_bucket.
+ * @bos:		[in/out] User pointer to memory location for BOs information
+ * 			This is an array of type kfd_criu_bo_bucket.
+ * @priv_data:		[in/out] User pointer to memory location for private data
+ * @priv_data_size:	[in/out] Size of priv_data in bytes
+ * @num_devices:	[in/out] Number of GPUs used by process. Size of @devices array.
+ * @num_bos		[in/out] Number of BOs used by process. Size of @bos array.
+ * @num_objects:	[in/out] Number of objects used by process. Objects are opaque to
+ *				 user application.
+ * @pid:		[in/out] PID of the process being checkpointed
+ * @op			[in] Type of operation (kfd_criu_op)
+ *
+ * Return: 0 on success, -errno on failure
+ */
+struct kfd_ioctl_criu_args {
+	__u64 devices;		/* Used during ops: CHECKPOINT, RESTORE */
+	__u64 bos;		/* Used during ops: CHECKPOINT, RESTORE */
+	__u64 priv_data;	/* Used during ops: CHECKPOINT, RESTORE */
+	__u64 priv_data_size;	/* Used during ops: PROCESS_INFO, RESTORE */
+	__u32 num_devices;	/* Used during ops: PROCESS_INFO, RESTORE */
+	__u32 num_bos;		/* Used during ops: PROCESS_INFO, RESTORE */
+	__u32 num_objects;	/* Used during ops: PROCESS_INFO, RESTORE */
+	__u32 pid;		/* Used during ops: PROCESS_INFO, RESUME */
+	__u32 op;
+};
+
+struct kfd_criu_device_bucket {
+	__u32 user_gpu_id;
+	__u32 actual_gpu_id;
+	__u32 drm_fd;
+	__u32 pad;
+};
+
+struct kfd_criu_bo_bucket {
+	__u64 addr;
+	__u64 size;
+	__u64 offset;
+	__u64 restored_offset;    /* During restore, updated offset for BO */
+	__u32 gpu_id;             /* This is the user_gpu_id */
+	__u32 alloc_flags;
+	__u32 dmabuf_fd;
+	__u32 pad;
+};
+
+/* CRIU IOCTLs - END */
+/**************************************************************************************************/
+
 /* Register offset inside the remapped mmio page
  */
 enum kfd_mmio_remap {
@@ -742,7 +818,10 @@ struct kfd_ioctl_set_xnack_mode_args {
 #define AMDKFD_IOC_SET_XNACK_MODE		\
 		AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args)
 
+#define AMDKFD_IOC_CRIU_OP			\
+		AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x22
+#define AMDKFD_COMMAND_END		0x23
 
 #endif
-- 
cgit v1.2.3


From 692996f2bef7aa1737e07554255ba0d9a73fb750 Mon Sep 17 00:00:00 2001
From: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Date: Tue, 18 Jan 2022 01:47:58 -0500
Subject: drm/amdkfd: Bump up KFD API version for CRIU

 - Change KFD minor version to 7 for CRIU

Proposed userspace changes:
https://github.com/RadeonOpenCompute/criu

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 49429a6c42fc..e6a56c146920 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -32,9 +32,10 @@
  * - 1.4 - Indicate new SRAM EDC bit in device properties
  * - 1.5 - Add SVM API
  * - 1.6 - Query clear flags in SVM get_attr API
+ * - 1.7 - Checkpoint Restore (CRIU) API
  */
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 6
+#define KFD_IOCTL_MINOR_VERSION 7
 
 struct kfd_ioctl_get_version_args {
 	__u32 major_version;	/* from KFD */
-- 
cgit v1.2.3


From 63ed1aab3d40aa61aaa66819bdce9377ac7f40fa Mon Sep 17 00:00:00 2001
From: Matt Johnston <matt@codeconstruct.com.au>
Date: Wed, 9 Feb 2022 12:05:57 +0800
Subject: mctp: Add SIOCMCTP{ALLOC,DROP}TAG ioctls for tag control

This change adds a couple of new ioctls for mctp sockets:
SIOCMCTPALLOCTAG and SIOCMCTPDROPTAG.  These ioctls provide facilities
for explicit allocation / release of tags, overriding the automatic
allocate-on-send/release-on-reply and timeout behaviours. This allows
userspace more control over messages that may not fit a simple
request/response model.

In order to indicate a pre-allocated tag to the sendmsg() syscall, we
introduce a new flag to the struct sockaddr_mctp.smctp_tag value:
MCTP_TAG_PREALLOC.

Additional changes from Jeremy Kerr <jk@codeconstruct.com.au>.

Contains a fix that was:
Reported-by: kernel test robot <lkp@intel.com>

Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mctp.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
index 07b0318716fc..154ab56651f1 100644
--- a/include/uapi/linux/mctp.h
+++ b/include/uapi/linux/mctp.h
@@ -44,7 +44,25 @@ struct sockaddr_mctp_ext {
 
 #define MCTP_TAG_MASK		0x07
 #define MCTP_TAG_OWNER		0x08
+#define MCTP_TAG_PREALLOC	0x10
 
 #define MCTP_OPT_ADDR_EXT	1
 
+#define SIOCMCTPALLOCTAG	(SIOCPROTOPRIVATE + 0)
+#define SIOCMCTPDROPTAG		(SIOCPROTOPRIVATE + 1)
+
+struct mctp_ioc_tag_ctl {
+	mctp_eid_t	peer_addr;
+
+	/* For SIOCMCTPALLOCTAG: must be passed as zero, kernel will
+	 * populate with the allocated tag value. Returned tag value will
+	 * always have TO and PREALLOC set.
+	 *
+	 * For SIOCMCTPDROPTAG: userspace provides tag value to drop, from
+	 * a prior SIOCMCTPALLOCTAG call (and so must have TO and PREALLOC set).
+	 */
+	__u8		tag;
+	__u16		flags;
+};
+
 #endif /* __UAPI_MCTP_H */
-- 
cgit v1.2.3


From 9a69e2b385f443f244a7e8b8bcafe5ccfb0866b4 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Wed, 9 Feb 2022 19:43:32 +0100
Subject: bpf: Make remote_port field in struct bpf_sk_lookup 16-bit wide

remote_port is another case of a BPF context field documented as a 32-bit
value in network byte order for which the BPF context access converter
generates a load of a zero-padded 16-bit integer in network byte order.

First such case was dst_port in bpf_sock which got addressed in commit
4421a582718a ("bpf: Make dst_port field in struct bpf_sock 16-bit wide").

Loading 4-bytes from the remote_port offset and converting the value with
bpf_ntohl() leads to surprising results, as the expected value is shifted
by 16 bits.

Reduce the confusion by splitting the field in two - a 16-bit field holding
a big-endian integer, and a 16-bit zero-padding anonymous field that
follows it.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220209184333.654927-2-jakub@cloudflare.com
---
 include/uapi/linux/bpf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a7f0ddedac1f..afe3d0d7f5f2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6453,7 +6453,8 @@ struct bpf_sk_lookup {
 	__u32 protocol;		/* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
 	__u32 remote_ip4;	/* Network byte order */
 	__u32 remote_ip6[4];	/* Network byte order */
-	__u32 remote_port;	/* Network byte order */
+	__be16 remote_port;	/* Network byte order */
+	__u16 :16;		/* Zero padding */
 	__u32 local_ip4;	/* Network byte order */
 	__u32 local_ip6[4];	/* Network byte order */
 	__u32 local_port;	/* Host byte order */
-- 
cgit v1.2.3


From 5bdd3eb253544b1e80f904e1205699d0a126d2d6 Mon Sep 17 00:00:00 2001
From: Mukul Joshi <mukul.joshi@amd.com>
Date: Fri, 4 Feb 2022 11:58:32 -0500
Subject: drm/amdkfd: Remove unused old debugger implementation

Cleanup the kfd code by removing the unused old debugger
implementation.
The address watch was only ever implemented in the upstream
driver for GFXv7 (Kaveri). The user mode tools runtime using
this API was never open-sourced. Work on the old debugger
prototype that used this API has been discontinued years ago.
Only a small piece of resetting wavefronts is kept and
is moved to kfd_device_queue_manager.c.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index e6a56c146920..6e4268f5e482 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -756,16 +756,16 @@ struct kfd_ioctl_set_xnack_mode_args {
 #define AMDKFD_IOC_WAIT_EVENTS			\
 		AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
 
-#define AMDKFD_IOC_DBG_REGISTER			\
+#define AMDKFD_IOC_DBG_REGISTER_DEPRECATED	\
 		AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args)
 
-#define AMDKFD_IOC_DBG_UNREGISTER		\
+#define AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED	\
 		AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args)
 
-#define AMDKFD_IOC_DBG_ADDRESS_WATCH		\
+#define AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED	\
 		AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args)
 
-#define AMDKFD_IOC_DBG_WAVE_CONTROL		\
+#define AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED	\
 		AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
 
 #define AMDKFD_IOC_SET_SCRATCH_BACKING_VA	\
-- 
cgit v1.2.3


From 5cad527d5ffa9a1c4731bb9c97d2ee93f8960d50 Mon Sep 17 00:00:00 2001
From: Menglong Dong <imagedong@tencent.com>
Date: Wed, 9 Feb 2022 14:08:38 +0800
Subject: net: drop_monitor: support drop reason

In the commit c504e5c2f964 ("net: skb: introduce kfree_skb_reason()")
drop reason is introduced to the tracepoint of kfree_skb. Therefore,
drop_monitor is able to report the drop reason to users by netlink.

The drop reasons are reported as string to users, which is exactly
the same as what we do when reporting it to ftrace.

Signed-off-by: Menglong Dong <imagedong@tencent.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20220209060838.55513-1-imagedong@tencent.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/net_dropmon.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 66048cc5d7b3..1bbea8f0681e 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -93,6 +93,7 @@ enum net_dm_attr {
 	NET_DM_ATTR_SW_DROPS,			/* flag */
 	NET_DM_ATTR_HW_DROPS,			/* flag */
 	NET_DM_ATTR_FLOW_ACTION_COOKIE,		/* binary */
+	NET_DM_ATTR_REASON,			/* string */
 
 	__NET_DM_ATTR_MAX,
 	NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
-- 
cgit v1.2.3


From 7f5a08c79df35e68f1a43033450c5050f12bc155 Mon Sep 17 00:00:00 2001
From: Beau Belgrave <beaub@linux.microsoft.com>
Date: Tue, 18 Jan 2022 12:43:15 -0800
Subject: user_events: Add minimal support for trace_event into ftrace

Minimal support for interacting with dynamic events, trace_event and
ftrace. Core outline of flow between user process, ioctl and trace_event
APIs.

User mode processes that wish to use trace events to get data into
ftrace, perf, eBPF, etc are limited to uprobes today. The user events
features enables an ABI for user mode processes to create and write to
trace events that are isolated from kernel level trace events. This
enables a faster path for tracing from user mode data as well as opens
managed code to participate in trace events, where stub locations are
dynamic.

User processes often want to trace only when it's useful. To enable this
a set of pages are mapped into the user process space that indicate the
current state of the user events that have been registered. User
processes can check if their event is hooked to a trace/probe, and if it
is, emit the event data out via the write() syscall.

Two new files are introduced into tracefs to accomplish this:
user_events_status - This file is mmap'd into participating user mode
processes to indicate event status.

user_events_data - This file is opened and register/delete ioctl's are
issued to create/open/delete trace events that can be used for tracing.

The typical scenario is on process start to mmap user_events_status. Processes
then register the events they plan to use via the REG ioctl. The ioctl reads
and updates the passed in user_reg struct. The status_index of the struct is
used to know the byte in the status page to check for that event. The
write_index of the struct is used to describe that event when writing out to
the fd that was used for the ioctl call. The data must always include this
index first when writing out data for an event. Data can be written either by
write() or by writev().

For example, in memory:
int index;
char data[];

Psuedo code example of typical usage:
struct user_reg reg;

int page_fd = open("user_events_status", O_RDWR);
char *page_data = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, page_fd, 0);
close(page_fd);

int data_fd = open("user_events_data", O_RDWR);

reg.size = sizeof(reg);
reg.name_args = (__u64)"test";

ioctl(data_fd, DIAG_IOCSREG, &reg);
int status_id = reg.status_index;
int write_id = reg.write_index;

struct iovec io[2];
io[0].iov_base = &write_id;
io[0].iov_len = sizeof(write_id);
io[1].iov_base = payload;
io[1].iov_len = sizeof(payload);

if (page_data[status_id])
	writev(data_fd, io, 2);

User events are also exposed via the dynamic_events tracefs file for
both create and delete. Current status is exposed via the user_events_status
tracefs file.

Simple example to register a user event via dynamic_events:
	echo u:test >> dynamic_events
	cat dynamic_events
	u:test

If an event is hooked to a probe, the probe hooked shows up:
	echo 1 > events/user_events/test/enable
	cat user_events_status
	1:test # Used by ftrace

	Active: 1
	Busy: 1
	Max: 4096

If an event is not hooked to a probe, no probe status shows up:
	echo 0 > events/user_events/test/enable
	cat user_events_status
	1:test

	Active: 1
	Busy: 0
	Max: 4096

Users can describe the trace event format via the following format:
	name[:FLAG1[,FLAG2...] [field1[;field2...]]

Each field has the following format:
	type name

Example for char array with a size of 20 named msg:
	echo 'u:detailed char[20] msg' >> dynamic_events
	cat dynamic_events
	u:detailed char[20] msg

Data offsets are based on the data written out via write() and will be
updated to reflect the correct offset in the trace_event fields. For dynamic
data it is recommended to use the new __rel_loc data type. This type will be
the same as __data_loc, but the offset is relative to this entry. This allows
user_events to not worry about what common fields are being inserted before
the data.

The above format is valid for both the ioctl and the dynamic_events file.

Link: https://lkml.kernel.org/r/20220118204326.2169-2-beaub@linux.microsoft.com

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/uapi/linux/user_events.h | 116 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 include/uapi/linux/user_events.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h
new file mode 100644
index 000000000000..e570840571e1
--- /dev/null
+++ b/include/uapi/linux/user_events.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2021, Microsoft Corporation.
+ *
+ * Authors:
+ *   Beau Belgrave <beaub@linux.microsoft.com>
+ */
+#ifndef _UAPI_LINUX_USER_EVENTS_H
+#define _UAPI_LINUX_USER_EVENTS_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#ifdef __KERNEL__
+#include <linux/uio.h>
+#else
+#include <sys/uio.h>
+#endif
+
+#define USER_EVENTS_SYSTEM "user_events"
+#define USER_EVENTS_PREFIX "u:"
+
+/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */
+#define EVENT_BIT_FTRACE 0
+#define EVENT_BIT_PERF 1
+#define EVENT_BIT_OTHER 7
+
+#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE)
+#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF)
+#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER)
+
+/* Create dynamic location entry within a 32-bit value */
+#define DYN_LOC(offset, size) ((size) << 16 | (offset))
+
+/* Use raw iterator for attached BPF program(s), no affect on ftrace/perf */
+#define FLAG_BPF_ITER (1 << 0)
+
+/*
+ * Describes an event registration and stores the results of the registration.
+ * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum
+ * must set the size and name_args before invocation.
+ */
+struct user_reg {
+
+	/* Input: Size of the user_reg structure being used */
+	__u32 size;
+
+	/* Input: Pointer to string with event name, description and flags */
+	__u64 name_args;
+
+	/* Output: Byte index of the event within the status page */
+	__u32 status_index;
+
+	/* Output: Index of the event to use when writing data */
+	__u32 write_index;
+};
+
+#define DIAG_IOC_MAGIC '*'
+
+/* Requests to register a user_event */
+#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg*)
+
+/* Requests to delete a user_event */
+#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*)
+
+/* Data type that was passed to the BPF program */
+enum {
+	/* Data resides in kernel space */
+	USER_BPF_DATA_KERNEL,
+
+	/* Data resides in user space */
+	USER_BPF_DATA_USER,
+
+	/* Data is a pointer to a user_bpf_iter structure */
+	USER_BPF_DATA_ITER,
+};
+
+/*
+ * Describes an iovec iterator that BPF programs can use to access data for
+ * a given user_event write() / writev() call.
+ */
+struct user_bpf_iter {
+
+	/* Offset of the data within the first iovec */
+	__u32 iov_offset;
+
+	/* Number of iovec structures */
+	__u32 nr_segs;
+
+	/* Pointer to iovec structures */
+	const struct iovec *iov;
+};
+
+/* Context that BPF programs receive when attached to a user_event */
+struct user_bpf_context {
+
+	/* Data type being passed (see union below) */
+	__u32 data_type;
+
+	/* Length of the data */
+	__u32 data_len;
+
+	/* Pointer to data, varies by data type */
+	union {
+		/* Kernel data (data_type == USER_BPF_DATA_KERNEL) */
+		void *kdata;
+
+		/* User data (data_type == USER_BPF_DATA_USER) */
+		void *udata;
+
+		/* Direct iovec (data_type == USER_BPF_DATA_ITER) */
+		struct user_bpf_iter *iter;
+	};
+};
+
+#endif /* _UAPI_LINUX_USER_EVENTS_H */
-- 
cgit v1.2.3


From a6a6fe27bab48f0d09a64b051e7bde432fcae081 Mon Sep 17 00:00:00 2001
From: "D. Wythe" <alibuda@linux.alibaba.com>
Date: Thu, 10 Feb 2022 17:11:37 +0800
Subject: net/smc: Dynamic control handshake limitation by socket options

This patch aims to add dynamic control for SMC handshake limitation for
every smc sockets, in production environment, it is possible for the
same applications to handle different service types, and may have
different opinion on SMC handshake limitation.

This patch try socket options to complete it, since we don't have socket
option level for SMC yet, which requires us to implement it at the same
time.

This patch does the following:

- add new socket option level: SOL_SMC.
- add new SMC socket option: SMC_LIMIT_HS.
- provide getter/setter for SMC socket options.

Link: https://lore.kernel.org/all/20f504f961e1a803f85d64229ad84260434203bd.1644323503.git.alibuda@linux.alibaba.com/
Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/smc.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index 6c2874fd2c00..343e7450c3a3 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -284,4 +284,8 @@ enum {
 	__SMC_NLA_SEID_TABLE_MAX,
 	SMC_NLA_SEID_TABLE_MAX = __SMC_NLA_SEID_TABLE_MAX - 1
 };
+
+/* SMC socket options */
+#define SMC_LIMIT_HS 1	/* constraint on smc handshake */
+
 #endif /* _UAPI_LINUX_SMC_H */
-- 
cgit v1.2.3


From f9496b7c1b48ce02cd17a3ee88b1e049c689a222 Mon Sep 17 00:00:00 2001
From: "D. Wythe" <alibuda@linux.alibaba.com>
Date: Thu, 10 Feb 2022 17:11:38 +0800
Subject: net/smc: Add global configure for handshake limitation by netlink

Although we can control SMC handshake limitation through socket options,
which means that applications who need it must modify their code. It's
quite troublesome for many existing applications. This patch modifies
the global default value of SMC handshake limitation through netlink,
providing a way to put constraint on handshake without modifies any code
for applications.

Suggested-by: Tony Lu <tonylu@linux.alibaba.com>
Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/smc.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index 343e7450c3a3..693f549f6966 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -59,6 +59,9 @@ enum {
 	SMC_NETLINK_DUMP_SEID,
 	SMC_NETLINK_ENABLE_SEID,
 	SMC_NETLINK_DISABLE_SEID,
+	SMC_NETLINK_DUMP_HS_LIMITATION,
+	SMC_NETLINK_ENABLE_HS_LIMITATION,
+	SMC_NETLINK_DISABLE_HS_LIMITATION,
 };
 
 /* SMC_GENL_FAMILY top level attributes */
@@ -285,6 +288,14 @@ enum {
 	SMC_NLA_SEID_TABLE_MAX = __SMC_NLA_SEID_TABLE_MAX - 1
 };
 
+/* SMC_NETLINK_HS_LIMITATION attributes */
+enum {
+	SMC_NLA_HS_LIMITATION_UNSPEC,
+	SMC_NLA_HS_LIMITATION_ENABLED,	/* u8 */
+	__SMC_NLA_HS_LIMITATION_MAX,
+	SMC_NLA_HS_LIMITATION_MAX = __SMC_NLA_HS_LIMITATION_MAX - 1
+};
+
 /* SMC socket options */
 #define SMC_LIMIT_HS 1	/* constraint on smc handshake */
 
-- 
cgit v1.2.3


From e9e9feebcbc14b174fef862842f8cc9a388e1db3 Mon Sep 17 00:00:00 2001
From: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Date: Fri, 11 Feb 2022 19:22:10 +0100
Subject: KVM: s390: Add optional storage key checking to MEMOP IOCTL

User space needs a mechanism to perform key checked accesses when
emulating instructions.

The key can be passed as an additional argument.
Having an additional argument is flexible, as user space can
pass the guest PSW's key, in order to make an access the same way the
CPU would, or pass another key if necessary.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Link: https://lore.kernel.org/r/20220211182215.2730017-6-scgl@linux.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
---
 include/uapi/linux/kvm.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 5191b57e1562..4566f429db2c 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -562,7 +562,10 @@ struct kvm_s390_mem_op {
 	__u32 op;		/* type of operation */
 	__u64 buf;		/* buffer in userspace */
 	union {
-		__u8 ar;	/* the access register number */
+		struct {
+			__u8 ar;	/* the access register number */
+			__u8 key;	/* access key, ignored if flag unset */
+		};
 		__u32 sida_offset; /* offset into the sida */
 		__u8 reserved[32]; /* should be set to 0 */
 	};
@@ -575,6 +578,7 @@ struct kvm_s390_mem_op {
 /* flags for kvm_s390_mem_op->flags */
 #define KVM_S390_MEMOP_F_CHECK_ONLY		(1ULL << 0)
 #define KVM_S390_MEMOP_F_INJECT_EXCEPTION	(1ULL << 1)
+#define KVM_S390_MEMOP_F_SKEY_PROTECTION	(1ULL << 2)
 
 /* for KVM_INTERRUPT */
 struct kvm_interrupt {
-- 
cgit v1.2.3


From ef11c9463ae006302ce170a401854a48ea0532ca Mon Sep 17 00:00:00 2001
From: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Date: Fri, 11 Feb 2022 19:22:11 +0100
Subject: KVM: s390: Add vm IOCTL for key checked guest absolute memory access

Channel I/O honors storage keys and is performed on absolute memory.
For I/O emulation user space therefore needs to be able to do key
checked accesses.
The vm IOCTL supports read/write accesses, as well as checking
if an access would succeed.
Unlike relying on KVM_S390_GET_SKEYS for key checking would,
the vm IOCTL performs the check in lockstep with the read or write,
by, ultimately, mapping the access to move instructions that
support key protection checking with a supplied key.
Fetch and storage protection override are not applicable to absolute
accesses and so are not applied as they are when using the vcpu memop.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Link: https://lore.kernel.org/r/20220211182215.2730017-7-scgl@linux.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
---
 include/uapi/linux/kvm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 4566f429db2c..4bc7623def87 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -575,6 +575,8 @@ struct kvm_s390_mem_op {
 #define KVM_S390_MEMOP_LOGICAL_WRITE	1
 #define KVM_S390_MEMOP_SIDA_READ	2
 #define KVM_S390_MEMOP_SIDA_WRITE	3
+#define KVM_S390_MEMOP_ABSOLUTE_READ	4
+#define KVM_S390_MEMOP_ABSOLUTE_WRITE	5
 /* flags for kvm_s390_mem_op->flags */
 #define KVM_S390_MEMOP_F_CHECK_ONLY		(1ULL << 0)
 #define KVM_S390_MEMOP_F_INJECT_EXCEPTION	(1ULL << 1)
-- 
cgit v1.2.3


From d004079edc166ff19605475211305923c708b4d5 Mon Sep 17 00:00:00 2001
From: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Date: Fri, 11 Feb 2022 19:22:13 +0100
Subject: KVM: s390: Add capability for storage key extension of MEM_OP IOCTL

Availability of the KVM_CAP_S390_MEM_OP_EXTENSION capability signals that:
* The vcpu MEM_OP IOCTL supports storage key checking.
* The vm MEM_OP IOCTL exists.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Link: https://lore.kernel.org/r/20220211182215.2730017-9-scgl@linux.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
---
 include/uapi/linux/kvm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 4bc7623def87..08756eeea065 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1140,6 +1140,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_VM_GPA_BITS 207
 #define KVM_CAP_XSAVE2 208
 #define KVM_CAP_SYS_ATTRIBUTES 209
+#define KVM_CAP_S390_MEM_OP_EXTENSION 210
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 5e35d0eb472b48ac9c8ef7017753b8a1f765aa01 Mon Sep 17 00:00:00 2001
From: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Date: Fri, 11 Feb 2022 19:22:14 +0100
Subject: KVM: s390: Update api documentation for memop ioctl

Document all currently existing operations, flags and explain under
which circumstances they are available. Document the recently
introduced absolute operations and the storage key protection flag,
as well as the existing SIDA operations.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Link: https://lore.kernel.org/r/20220211182215.2730017-10-scgl@linux.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
---
 include/uapi/linux/kvm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 08756eeea065..dbc550bbd9fa 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -567,7 +567,7 @@ struct kvm_s390_mem_op {
 			__u8 key;	/* access key, ignored if flag unset */
 		};
 		__u32 sida_offset; /* offset into the sida */
-		__u8 reserved[32]; /* should be set to 0 */
+		__u8 reserved[32]; /* ignored */
 	};
 };
 /* types for kvm_s390_mem_op->op */
-- 
cgit v1.2.3


From 761b9b366cec0c81a1cd80930f00611d86521d1b Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 31 Jan 2022 16:54:53 +0000
Subject: elf: Introduce the ARM MTE ELF segment type

Memory tags will be dumped in the core file as segments with their own
type. Discussions with the binutils and the generic ABI community
settled on using new definitions in the PT_*PROC space (and to be
documented in the processor-specific ABIs).

Introduce PT_ARM_MEMTAG_MTE as (PT_LOPROC + 0x1). Not included in this
patch since there is no upstream support but the CHERI/BSD community
will also reserve:

  #define PT_ARM_MEMTAG_CHERI    (PT_LOPROC + 0x2)
  #define PT_RISCV_MEMTAG_CHERI  (PT_LOPROC + 0x3)

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Luis Machado <luis.machado@linaro.org>
Link: https://lore.kernel.org/r/20220131165456.2160675-3-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/uapi/linux/elf.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 61bf4774b8f2..fe8e5b74cb39 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -40,6 +40,9 @@ typedef __s64	Elf64_Sxword;
 
 #define PT_GNU_STACK	(PT_LOOS + 0x474e551)
 
+/* ARM MTE memory tag segment type */
+#define PT_ARM_MEMTAG_MTE	(PT_LOPROC + 0x1)
+
 /*
  * Extended Numbering
  *
-- 
cgit v1.2.3


From 5cd5a8a3e2fb11b1c8a09f062c44c1e228ef987a Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Mon, 14 Feb 2022 17:29:53 +0100
Subject: cfg80211: Add data structures to capture EHT capabilities

And advertise EHT capabilities to user space when supported.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Link: https://lore.kernel.org/r/20220214173004.6fb70658529f.I2413a37c8f7d2d6d638038a3d95360a3fce0114d@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 195a238a322e..d305a8b8c536 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3766,6 +3766,14 @@ enum nl80211_mpath_info {
  *	given for all 6 GHz band channels
  * @NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS: vendor element capabilities that are
  *	advertised on this band/for this iftype (binary)
+ * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MAC: EHT MAC capabilities as in EHT
+ *	capabilities element
+ * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PHY: EHT PHY capabilities as in EHT
+ *	capabilities element
+ * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MCS_SET: EHT supported NSS/MCS as in EHT
+ *	capabilities element
+ * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE: EHT PPE thresholds information as
+ *	defined in EHT capabilities element
  * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use
  * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band attribute currently defined
  */
@@ -3779,6 +3787,10 @@ enum nl80211_band_iftype_attr {
 	NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE,
 	NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA,
 	NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS,
+	NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MAC,
+	NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PHY,
+	NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MCS_SET,
+	NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE,
 
 	/* keep last */
 	__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST,
-- 
cgit v1.2.3


From 3743bec6120ae0748cb3fda6ff80a690117ef1f3 Mon Sep 17 00:00:00 2001
From: Jia Ding <quic_jiad@quicinc.com>
Date: Mon, 14 Feb 2022 17:29:54 +0100
Subject: cfg80211: Add support for EHT 320 MHz channel width

Add 320 MHz support in the channel def and center frequency validation
with compatible check.

Signed-off-by: Jia Ding <quic_jiad@quicinc.com>
Co-authored-by: Karthikeyan Periyasamy <quic_periyasa@quicinc.com>
Signed-off-by: Karthikeyan Periyasamy <quic_periyasa@quicinc.com>
Co-authored-by: Muna Sinada <quic_msinada@quicinc.com>
Signed-off-by: Muna Sinada <quic_msinada@quicinc.com>
Co-authored-by: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Signed-off-by: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Link: https://lore.kernel.org/r/1640163883-12696-5-git-send-email-quic_vjakkam@quicinc.com
Link: https://lore.kernel.org/r/20220214163009.175289-1-johannes@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index d305a8b8c536..9e05973f3f56 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4684,6 +4684,8 @@ enum nl80211_key_mode {
  * @NL80211_CHAN_WIDTH_4: 4 MHz OFDM channel
  * @NL80211_CHAN_WIDTH_8: 8 MHz OFDM channel
  * @NL80211_CHAN_WIDTH_16: 16 MHz OFDM channel
+ * @NL80211_CHAN_WIDTH_320: 320 MHz channel, the %NL80211_ATTR_CENTER_FREQ1
+ *	attribute must be provided as well
  */
 enum nl80211_chan_width {
 	NL80211_CHAN_WIDTH_20_NOHT,
@@ -4699,6 +4701,7 @@ enum nl80211_chan_width {
 	NL80211_CHAN_WIDTH_4,
 	NL80211_CHAN_WIDTH_8,
 	NL80211_CHAN_WIDTH_16,
+	NL80211_CHAN_WIDTH_320,
 };
 
 /**
-- 
cgit v1.2.3


From cfb14110acf87b4db62e07ba08a80429f1749f40 Mon Sep 17 00:00:00 2001
From: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Date: Mon, 14 Feb 2022 17:29:55 +0100
Subject: nl80211: add EHT MCS support

Add support for reporting and calculating EHT bitrates.

Signed-off-by: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Link: https://lore.kernel.org/r/1640163883-12696-7-git-send-email-quic_vjakkam@quicinc.com
Link: https://lore.kernel.org/r/20220214163009.175289-2-johannes@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 62 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 9e05973f3f56..d0ba70ea5d04 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3392,6 +3392,56 @@ enum nl80211_he_ru_alloc {
 	NL80211_RATE_INFO_HE_RU_ALLOC_2x996,
 };
 
+/**
+ * enum nl80211_eht_gi - EHT guard interval
+ * @NL80211_RATE_INFO_EHT_GI_0_8: 0.8 usec
+ * @NL80211_RATE_INFO_EHT_GI_1_6: 1.6 usec
+ * @NL80211_RATE_INFO_EHT_GI_3_2: 3.2 usec
+ */
+enum nl80211_eht_gi {
+	NL80211_RATE_INFO_EHT_GI_0_8,
+	NL80211_RATE_INFO_EHT_GI_1_6,
+	NL80211_RATE_INFO_EHT_GI_3_2,
+};
+
+/**
+ * enum nl80211_eht_ru_alloc - EHT RU allocation values
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_26: 26-tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_52: 52-tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_52P26: 52+26-tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_106: 106-tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_106P26: 106+26 tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_242: 242-tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_484: 484-tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_484P242: 484+242 tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_996: 996-tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_996P484: 996+484 tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242: 996+484+242 tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_2x996: 2x996-tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484: 2x996+484 tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_3x996: 3x996-tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484: 3x996+484 tone RU allocation
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC_4x996: 4x996-tone RU allocation
+ */
+enum nl80211_eht_ru_alloc {
+	NL80211_RATE_INFO_EHT_RU_ALLOC_26,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_52,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_52P26,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_106,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_106P26,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_242,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_484,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_484P242,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_996,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_996P484,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_2x996,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_3x996,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484,
+	NL80211_RATE_INFO_EHT_RU_ALLOC_4x996,
+};
+
 /**
  * enum nl80211_rate_info - bitrate information
  *
@@ -3431,6 +3481,13 @@ enum nl80211_he_ru_alloc {
  * @NL80211_RATE_INFO_HE_DCM: HE DCM value (u8, 0/1)
  * @NL80211_RATE_INFO_RU_ALLOC: HE RU allocation, if not present then
  *	non-OFDMA was used (u8, see &enum nl80211_he_ru_alloc)
+ * @NL80211_RATE_INFO_320_MHZ_WIDTH: 320 MHz bitrate
+ * @NL80211_RATE_INFO_EHT_MCS: EHT MCS index (u8, 0-15)
+ * @NL80211_RATE_INFO_EHT_NSS: EHT NSS value (u8, 1-8)
+ * @NL80211_RATE_INFO_EHT_GI: EHT guard interval identifier
+ *	(u8, see &enum nl80211_eht_gi)
+ * @NL80211_RATE_INFO_EHT_RU_ALLOC: EHT RU allocation, if not present then
+ *	non-OFDMA was used (u8, see &enum nl80211_eht_ru_alloc)
  * @__NL80211_RATE_INFO_AFTER_LAST: internal use
  */
 enum nl80211_rate_info {
@@ -3452,6 +3509,11 @@ enum nl80211_rate_info {
 	NL80211_RATE_INFO_HE_GI,
 	NL80211_RATE_INFO_HE_DCM,
 	NL80211_RATE_INFO_HE_RU_ALLOC,
+	NL80211_RATE_INFO_320_MHZ_WIDTH,
+	NL80211_RATE_INFO_EHT_MCS,
+	NL80211_RATE_INFO_EHT_NSS,
+	NL80211_RATE_INFO_EHT_GI,
+	NL80211_RATE_INFO_EHT_RU_ALLOC,
 
 	/* keep last */
 	__NL80211_RATE_INFO_AFTER_LAST,
-- 
cgit v1.2.3


From c2b3d7699fb0ce66538b829af43970acc2f89060 Mon Sep 17 00:00:00 2001
From: Sriram R <quic_srirrama@quicinc.com>
Date: Mon, 14 Feb 2022 17:29:56 +0100
Subject: nl80211: add support for 320MHz channel limitation

Add support to advertise drivers or regulatory limitations on 320 MHz
channels to userspace.

Signed-off-by: Sriram R <quic_srirrama@quicinc.com>
Co-authored-by: Karthikeyan Periyasamy <quic_periyasia@quicinc.com>
Signed-off-by: Karthikeyan Periyasamy <quic_periyasia@quicinc.com>
Co-authored-by: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Signed-off-by: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Link: https://lore.kernel.org/r/1640163883-12696-6-git-send-email-quic_vjakkam@quicinc.com
Link: https://lore.kernel.org/r/20220214163009.175289-3-johannes@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index d0ba70ea5d04..6a338dafcd07 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3997,6 +3997,8 @@ enum nl80211_wmm_rule {
  *	on this channel in current regulatory domain.
  * @NL80211_FREQUENCY_ATTR_16MHZ: 16 MHz operation is allowed
  *	on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_NO_320MHZ: any 320 MHz channel using this channel
+ *	as the primary or any of the secondary channels isn't possible
  * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
  *	currently defined
  * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
@@ -4033,6 +4035,7 @@ enum nl80211_frequency_attr {
 	NL80211_FREQUENCY_ATTR_4MHZ,
 	NL80211_FREQUENCY_ATTR_8MHZ,
 	NL80211_FREQUENCY_ATTR_16MHZ,
+	NL80211_FREQUENCY_ATTR_NO_320MHZ,
 
 	/* keep last */
 	__NL80211_FREQUENCY_ATTR_AFTER_LAST,
@@ -4231,6 +4234,7 @@ enum nl80211_sched_scan_match_attr {
  * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed
  * @NL80211_RRF_NO_160MHZ: 160MHz operation not allowed
  * @NL80211_RRF_NO_HE: HE operation not allowed
+ * @NL80211_RRF_NO_320MHZ: 320MHz operation not allowed
  */
 enum nl80211_reg_rule_flags {
 	NL80211_RRF_NO_OFDM		= 1<<0,
@@ -4249,6 +4253,7 @@ enum nl80211_reg_rule_flags {
 	NL80211_RRF_NO_80MHZ		= 1<<15,
 	NL80211_RRF_NO_160MHZ		= 1<<16,
 	NL80211_RRF_NO_HE		= 1<<17,
+	NL80211_RRF_NO_320MHZ		= 1<<18,
 };
 
 #define NL80211_RRF_PASSIVE_SCAN	NL80211_RRF_NO_IR
-- 
cgit v1.2.3


From 31846b657857e6a73d982604f36a34710d98902c Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Mon, 14 Feb 2022 17:29:57 +0100
Subject: cfg80211: add NO-EHT flag to regulatory

This may be necessary in some cases, add a flag and propagate
it, just like the NO-HE that already exists.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
[split off from a combined 320/no-EHT patch]
Link: https://lore.kernel.org/r/20220214173004.dbb85a7b86bb.Ifc1e2daac51c1cc5f895ccfb79faf5eaec3950ec@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 6a338dafcd07..baf6433c0119 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3999,6 +3999,8 @@ enum nl80211_wmm_rule {
  *	on this channel in current regulatory domain.
  * @NL80211_FREQUENCY_ATTR_NO_320MHZ: any 320 MHz channel using this channel
  *	as the primary or any of the secondary channels isn't possible
+ * @NL80211_FREQUENCY_ATTR_NO_EHT: EHT operation is not allowed on this channel
+ *	in current regulatory domain.
  * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
  *	currently defined
  * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
@@ -4036,6 +4038,7 @@ enum nl80211_frequency_attr {
 	NL80211_FREQUENCY_ATTR_8MHZ,
 	NL80211_FREQUENCY_ATTR_16MHZ,
 	NL80211_FREQUENCY_ATTR_NO_320MHZ,
+	NL80211_FREQUENCY_ATTR_NO_EHT,
 
 	/* keep last */
 	__NL80211_FREQUENCY_ATTR_AFTER_LAST,
-- 
cgit v1.2.3


From ea05fd3581d32a0f1098657005c7a9b763798fe8 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Mon, 14 Feb 2022 17:29:58 +0100
Subject: cfg80211: Support configuration of station EHT capabilities

Add attributes and some code bits to support userspace passing
in EHT capabilities of stations.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Link: https://lore.kernel.org/r/20220214173004.ecf0b3ff9627.Icb4a5f2ec7b41d9008ac4cfc16c59baeb84793d3@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index baf6433c0119..98ed52663d6b 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -11,7 +11,7 @@
  * Copyright 2008 Jouni Malinen <jouni.malinen@atheros.com>
  * Copyright 2008 Colin McCabe <colin@cozybit.com>
  * Copyright 2015-2017	Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -2659,6 +2659,10 @@ enum nl80211_commands {
  *	enumerated in &enum nl80211_ap_settings_flags. This attribute shall be
  *	used with %NL80211_CMD_START_AP request.
  *
+ * @NL80211_ATTR_EHT_CAPABILITY: EHT Capability information element (from
+ *	association request when used with NL80211_CMD_NEW_STATION). Can be set
+ *	only if %NL80211_STA_FLAG_WME is set.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3169,6 +3173,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_AP_SETTINGS_FLAGS,
 
+	NL80211_ATTR_EHT_CAPABILITY,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3224,6 +3230,8 @@ enum nl80211_attrs {
 #define NL80211_HE_MAX_CAPABILITY_LEN           54
 #define NL80211_MAX_NR_CIPHER_SUITES		5
 #define NL80211_MAX_NR_AKM_SUITES		2
+#define NL80211_EHT_MIN_CAPABILITY_LEN          13
+#define NL80211_EHT_MAX_CAPABILITY_LEN          51
 
 #define NL80211_MIN_REMAIN_ON_CHANNEL_TIME	10
 
-- 
cgit v1.2.3


From 79aa0367385ceaf5351ea77ea1fb66136739ea9d Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Tue, 15 Feb 2022 18:54:38 -0500
Subject: drm/amdkfd: Replace zero-length array with flexible-array member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reference:
https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays

CC: Changcheng Deng <deng.changcheng@zte.com.cn>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 6e4268f5e482..baec5a41de3e 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -673,7 +673,7 @@ struct kfd_ioctl_svm_args {
 	__u32 op;
 	__u32 nattr;
 	/* Variable length array of attributes */
-	struct kfd_ioctl_svm_attribute attrs[0];
+	struct kfd_ioctl_svm_attribute attrs[];
 };
 
 /**
-- 
cgit v1.2.3


From 169adc2b6b3c5e86391921117b4ab3aaeb3c6ee1 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 10 Feb 2022 11:11:26 +0900
Subject: android/binder.h: add linux/android/binder(fs).h to UAPI compile-test
 coverage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

linux/android/binder.h and linux/android/binderfs.h are currently
excluded from the UAPI compile-test because of the errors like follows:

    HDRTEST usr/include/linux/android/binder.h
  In file included from <command-line>:
  ./usr/include/linux/android/binder.h:291:9: error: unknown type name ‘pid_t’
    291 |         pid_t           sender_pid;
        |         ^~~~~
  ./usr/include/linux/android/binder.h:292:9: error: unknown type name ‘uid_t’
    292 |         uid_t           sender_euid;
        |         ^~~~~

The errors can be fixed by replacing {pid,uid}_t with __kernel_{pid,uid}_t.

Then, remove the no-header-test entries from user/include/Makefile.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/linux/android/binder.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index 3246f2c74696..11157fae8a8e 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -288,8 +288,8 @@ struct binder_transaction_data {
 
 	/* General information about the transaction. */
 	__u32	        flags;
-	pid_t		sender_pid;
-	uid_t		sender_euid;
+	__kernel_pid_t	sender_pid;
+	__kernel_uid_t	sender_euid;
 	binder_size_t	data_size;	/* number of bytes of data */
 	binder_size_t	offsets_size;	/* number of bytes of offsets */
 
-- 
cgit v1.2.3


From cbf2820341297b9aed0f846aba35556e94569210 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 10 Feb 2022 11:11:27 +0900
Subject: fsmap.h: add linux/fsmap.h to UAPI compile-test coverage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

linux/fsmap.h is currently excluded from the UAPI compile-test because
of the error like follows:

    HDRTEST usr/include/linux/fsmap.h
  In file included from <command-line>:
  ./usr/include/linux/fsmap.h:72:19: error: unknown type name ‘size_t’
     72 | static __inline__ size_t
        |                   ^~~~~~

The error can be fixed by replacing size_t with __kernel_size_t.

Then, remove the no-header-test entry from user/include/Makefile.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/linux/fsmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/fsmap.h b/include/uapi/linux/fsmap.h
index 91fd519a3f7d..c690d17f1d07 100644
--- a/include/uapi/linux/fsmap.h
+++ b/include/uapi/linux/fsmap.h
@@ -69,7 +69,7 @@ struct fsmap_head {
 };
 
 /* Size of an fsmap_head with room for nr records. */
-static inline size_t
+static inline __kernel_size_t
 fsmap_sizeof(
 	unsigned int	nr)
 {
-- 
cgit v1.2.3


From 8b4bca21c2c0cb3b5adb80985830a81d4e5d7081 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 10 Feb 2022 11:11:28 +0900
Subject: kexec.h: add linux/kexec.h to UAPI compile-test coverage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

linux/kexec.h is currently excluded from the UAPI compile-test because
of the errors like follows:

    HDRTEST usr/include/linux/kexec.h
  In file included from <command-line>:
  ./usr/include/linux/kexec.h:56:9: error: unknown type name ‘size_t’
     56 |         size_t bufsz;
        |         ^~~~~~
  ./usr/include/linux/kexec.h:58:9: error: unknown type name ‘size_t’
     58 |         size_t memsz;
        |         ^~~~~~

The errors can be fixed by replacing size_t with __kernel_size_t.

Then, remove the no-header-test entry from user/include/Makefile.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/linux/kexec.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 778dc191c265..fb7e2ef60825 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -54,9 +54,9 @@
  */
 struct kexec_segment {
 	const void *buf;
-	size_t bufsz;
+	__kernel_size_t bufsz;
 	const void *mem;
-	size_t memsz;
+	__kernel_size_t memsz;
 };
 
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 2a5c0fdc70cd653741e910e92ffeb2fa7376db07 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 10 Feb 2022 11:11:29 +0900
Subject: reiserfs_xattr.h: add linux/reiserfs_xattr.h to UAPI compile-test
 coverage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

linux/reiserfs_xattr.h is currently excluded from the UAPI compile-test
because of the error like follows:

    HDRTEST usr/include/linux/reiserfs_xattr.h
  In file included from <command-line>:
  ./usr/include/linux/reiserfs_xattr.h:22:9: error: unknown type name ‘size_t’
     22 |         size_t length;
        |         ^~~~~~

The error can be fixed by replacing size_t with __kernel_size_t.

Then, remove the no-header-test entry from user/include/Makefile.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/linux/reiserfs_xattr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/reiserfs_xattr.h b/include/uapi/linux/reiserfs_xattr.h
index 28f10842f047..503ad018ce5b 100644
--- a/include/uapi/linux/reiserfs_xattr.h
+++ b/include/uapi/linux/reiserfs_xattr.h
@@ -19,7 +19,7 @@ struct reiserfs_xattr_header {
 struct reiserfs_security_handle {
 	const char *name;
 	void *value;
-	size_t length;
+	__kernel_size_t length;
 };
 
 #endif  /*  _LINUX_REISERFS_XATTR_H  */
-- 
cgit v1.2.3


From d4568fc8525897e683983806f813be1ae9eedaed Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 24 Jan 2022 18:29:52 +0100
Subject: media: omap3isp: Use struct_group() for memcpy() region

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally writing across neighboring fields. Wrap the target region
in struct_group(). This additionally fixes a theoretical misalignment
of the copy (since the size of "buf" changes between 64-bit and 32-bit,
but this is likely never built for 64-bit).

FWIW, I think this code is totally broken on 64-bit (which appears to
not be a "real" build configuration): it would either always fail (with
an uninitialized data->buf_size) or would cause corruption in userspace
due to the copy_to_user() in the call path against an uninitialized
data->buf value:

omap3isp_stat_request_statistics_time32(...)
    struct omap3isp_stat_data data64;
    ...
    omap3isp_stat_request_statistics(stat, &data64);

int omap3isp_stat_request_statistics(struct ispstat *stat,
                                     struct omap3isp_stat_data *data)
    ...
    buf = isp_stat_buf_get(stat, data);

static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat,
                                               struct omap3isp_stat_data *data)
...
    if (buf->buf_size > data->buf_size) {
            ...
            return ERR_PTR(-EINVAL);
    }
    ...
    rval = copy_to_user(data->buf,
                        buf->virt_addr,
                        buf->buf_size);

Regardless, additionally initialize data64 to be zero-filled to avoid
undefined behavior.

Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor

Cc: Arnd Bergmann <arnd@arndb.de>
Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data")
Cc: stable@vger.kernel.org
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
 include/uapi/linux/omap3isp.h | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h
index 87b55755f4ff..d9db7ad43890 100644
--- a/include/uapi/linux/omap3isp.h
+++ b/include/uapi/linux/omap3isp.h
@@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config {
  * struct omap3isp_stat_data - Statistic data sent to or received from user
  * @ts: Timestamp of returned framestats.
  * @buf: Pointer to pass to user.
+ * @buf_size: Size of buffer.
  * @frame_number: Frame number of requested stats.
  * @cur_frame: Current frame number being processed.
  * @config_counter: Number of the configuration associated with the data.
@@ -176,10 +177,12 @@ struct omap3isp_stat_data {
 	struct timeval ts;
 #endif
 	void __user *buf;
-	__u32 buf_size;
-	__u16 frame_number;
-	__u16 cur_frame;
-	__u16 config_counter;
+	__struct_group(/* no tag */, frame, /* no attrs */,
+		__u32 buf_size;
+		__u16 frame_number;
+		__u16 cur_frame;
+		__u16 config_counter;
+	);
 };
 
 #ifdef __KERNEL__
@@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 {
 		__s32	tv_usec;
 	} ts;
 	__u32 buf;
-	__u32 buf_size;
-	__u16 frame_number;
-	__u16 cur_frame;
-	__u16 config_counter;
+	__struct_group(/* no tag */, frame, /* no attrs */,
+		__u32 buf_size;
+		__u16 frame_number;
+		__u16 cur_frame;
+		__u16 config_counter;
+	);
 };
 #endif
 
-- 
cgit v1.2.3


From 6de74d1069b821e96460d0fc2edfc35785db04fb Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Wed, 9 Feb 2022 08:11:10 -0800
Subject: hv_utils: Add comment about max VMbus packet size in VSS driver

The VSS driver allocates a VMbus receive buffer significantly
larger than sizeof(hv_vss_msg), with no explanation. To help
prevent future mistakes, add a #define and comment about why
this is done.

No functional change.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/1644423070-75125-1-git-send-email-mikelley@microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 include/uapi/linux/hyperv.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
index daf82a230c0e..aaa502a7bff4 100644
--- a/include/uapi/linux/hyperv.h
+++ b/include/uapi/linux/hyperv.h
@@ -90,6 +90,17 @@ struct hv_vss_check_dm_info {
 	__u32 flags;
 } __attribute__((packed));
 
+/*
+ * struct hv_vss_msg encodes the fields that the Linux VSS
+ * driver accesses. However, FREEZE messages from Hyper-V contain
+ * additional LUN information that Linux doesn't use and are not
+ * represented in struct hv_vss_msg. A received FREEZE message may
+ * be as large as 6,260 bytes, so the driver must allocate at least
+ * that much space, not sizeof(struct hv_vss_msg). Other messages
+ * such as AUTO_RECOVER may be as large as 12,500 bytes. However,
+ * because the Linux VSS driver responds that it doesn't support
+ * auto-recovery, it should not receive such messages.
+ */
 struct hv_vss_msg {
 	union {
 		struct hv_vss_hdr vss_hdr;
-- 
cgit v1.2.3


From 47f0bd5032106469827cf56c8b45bb9101112105 Mon Sep 17 00:00:00 2001
From: Jacques de Laval <Jacques.De.Laval@westermo.com>
Date: Thu, 17 Feb 2022 16:02:02 +0100
Subject: net: Add new protocol attribute to IP addresses

This patch adds a new protocol attribute to IPv4 and IPv6 addresses.
Inspiration was taken from the protocol attribute of routes. User space
applications like iproute2 can set/get the protocol with the Netlink API.

The attribute is stored as an 8-bit unsigned integer.

The protocol attribute is set by kernel for these categories:

- IPv4 and IPv6 loopback addresses
- IPv6 addresses generated from router announcements
- IPv6 link local addresses

User space may pass custom protocols, not defined by the kernel.

Grouping addresses on their origin is useful in scenarios where you want
to distinguish between addresses based on who added them, e.g. kernel
vs. user space.

Tagging addresses with a string label is an existing feature that could be
used as a solution. Unfortunately the max length of a label is
15 characters, and for compatibility reasons the label must be prefixed
with the name of the device followed by a colon. Since device names also
have a max length of 15 characters, only -1 characters is guaranteed to be
available for any origin tag, which is not that much.

A reference implementation of user space setting and getting protocols
is available for iproute2:

https://github.com/westermo/iproute2/commit/9a6ea18bd79f47f293e5edc7780f315ea42ff540

Signed-off-by: Jacques de Laval <Jacques.De.Laval@westermo.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20220217150202.80802-1-Jacques.De.Laval@westermo.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_addr.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h
index dfcf3ce0097f..1c392dd95a5e 100644
--- a/include/uapi/linux/if_addr.h
+++ b/include/uapi/linux/if_addr.h
@@ -33,8 +33,9 @@ enum {
 	IFA_CACHEINFO,
 	IFA_MULTICAST,
 	IFA_FLAGS,
-	IFA_RT_PRIORITY,  /* u32, priority/metric for prefix route */
+	IFA_RT_PRIORITY,	/* u32, priority/metric for prefix route */
 	IFA_TARGET_NETNSID,
+	IFA_PROTO,		/* u8, address protocol */
 	__IFA_MAX,
 };
 
@@ -69,4 +70,10 @@ struct ifa_cacheinfo {
 #define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg))
 #endif
 
+/* ifa_proto */
+#define IFAPROT_UNSPEC		0
+#define IFAPROT_KERNEL_LO	1	/* loopback */
+#define IFAPROT_KERNEL_RA	2	/* set by kernel from router announcement */
+#define IFAPROT_KERNEL_LL	3	/* link-local set by kernel */
+
 #endif
-- 
cgit v1.2.3


From 4b340a5a726dafba15b366c4009aa0a8f77631ac Mon Sep 17 00:00:00 2001
From: Mobashshera Rasool <mobash.rasool.linux@gmail.com>
Date: Thu, 17 Feb 2022 07:46:40 +0000
Subject: net: ip6mr: add support for passing full packet on wrong mif

This patch adds support for MRT6MSG_WRMIFWHOLE which is used to pass
full packet and real vif id when the incoming interface is wrong.
While the RP and FHR are setting up state we need to be sending the
registers encapsulated with all the data inside otherwise we lose it.
The RP then decapsulates it and forwards it to the interested parties.
Currently with WRONGMIF we can only be sending empty register packets
and will lose that data.
This behaviour can be enabled by using MRT_PIM with
val == MRT6MSG_WRMIFWHOLE. This doesn't prevent MRT6MSG_WRONGMIF from
happening, it happens in addition to it, also it is controlled by the same
throttling parameters as WRONGMIF (i.e. 1 packet per 3 seconds currently).
Both messages are generated to keep backwards compatibily and avoid
breaking someone who was enabling MRT_PIM with val == 4, since any
positive val is accepted and treated the same.

Signed-off-by: Mobashshera Rasool <mobash.rasool.linux@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mroute6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h
index a1fd6173e2db..1d90c21a6251 100644
--- a/include/uapi/linux/mroute6.h
+++ b/include/uapi/linux/mroute6.h
@@ -134,6 +134,7 @@ struct mrt6msg {
 #define MRT6MSG_NOCACHE		1
 #define MRT6MSG_WRONGMIF	2
 #define MRT6MSG_WHOLEPKT	3		/* used for use level encap */
+#define MRT6MSG_WRMIFWHOLE	4		/* For PIM Register and assert processing */
 	__u8		im6_mbz;		/* must be zero		   */
 	__u8		im6_msgtype;		/* what type of message    */
 	__u16		im6_mif;		/* mif rec'd on		   */
-- 
cgit v1.2.3


From a1dc6308865df719efb2a2f8a5f0f5979602d267 Mon Sep 17 00:00:00 2001
From: Amitay Isaacs <amitay@ozlabs.org>
Date: Fri, 21 Jan 2022 16:08:16 +1030
Subject: fsi: sbefifo: Implement FSI_SBEFIFO_READ_TIMEOUT_SECONDS ioctl

FSI_SBEFIFO_READ_TIMEOUT_SECONDS ioctl sets the read timeout (in
seconds) for the response received by sbefifo device from sbe.  The
timeout affects only the read operation on current sbefifo device fd.

Certain SBE operations can take long time to complete and the default
timeout of 10 seconds might not be sufficient to start receiving
response from SBE.  In such cases, allow the timeout to be set to the
maximum of 120 seconds.

The kernel does not contain the definition of the various SBE
operations, so we must expose an interface to userspace to set the
timeout for the given operation.

Signed-off-by: Amitay Isaacs <amitay@ozlabs.org>
Signed-off-by: Joel Stanley <joel@jms.id.au>
Reviewed-by: Eddie James <eajames@linux.ibm.com>
Link: https://lore.kernel.org/r/20220121053816.82253-3-joel@jms.id.au
Signed-off-by: Joel Stanley <joel@jms.id.au>
---
 include/uapi/linux/fsi.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/fsi.h b/include/uapi/linux/fsi.h
index da577ecd90e7..b2f1977378c7 100644
--- a/include/uapi/linux/fsi.h
+++ b/include/uapi/linux/fsi.h
@@ -55,4 +55,18 @@ struct scom_access {
 #define FSI_SCOM_WRITE	_IOWR('s', 0x02, struct scom_access)
 #define FSI_SCOM_RESET	_IOW('s', 0x03, __u32)
 
+/*
+ * /dev/sbefifo* ioctl interface
+ */
+
+/**
+ * FSI_SBEFIFO_READ_TIMEOUT sets the read timeout for response from SBE.
+ *
+ * The read timeout is specified in seconds.  The minimum value of read
+ * timeout is 10 seconds (default) and the maximum value of read timeout is
+ * 120 seconds.  A read timeout of 0 will reset the value to the default of
+ * (10 seconds).
+ */
+#define FSI_SBEFIFO_READ_TIMEOUT_SECONDS	_IOW('s', 0x00, __u32)
+
 #endif /* _UAPI_LINUX_FSI_H */
-- 
cgit v1.2.3


From 129e3c1bab24d27d0fa6e505a472345a92d7a2b0 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 21 Feb 2022 13:54:57 +0800
Subject: bonding: add new option ns_ip6_target

This patch add a new bonding option ns_ip6_target, which correspond
to the arp_ip_target. With this we set IPv6 targets and send IPv6 NS
request to determine the health of the link.

For other related options like the validation, we still use
arp_validate, and will change to ns_validate later.

Note: the sysfs configuration support was removed based on
https://lore.kernel.org/netdev/8863.1645071997@famine

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 6218f93f5c1a..e1ba2d51b717 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -860,6 +860,7 @@ enum {
 	IFLA_BOND_PEER_NOTIF_DELAY,
 	IFLA_BOND_AD_LACP_ACTIVE,
 	IFLA_BOND_MISSED_MAX,
+	IFLA_BOND_NS_IP6_TARGET,
 	__IFLA_BOND_MAX,
 };
 
-- 
cgit v1.2.3


From d43583b890e7cb0078d13d056753a56602b92406 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 21 Feb 2022 15:35:23 +0000
Subject: KVM: arm64: Expose PSCI SYSTEM_RESET2 call to the guest

PSCI v1.1 introduces the optional SYSTEM_RESET2 call, which allows the
caller to provide a vendor-specific "reset type" and "cookie" to request
a particular form of reset or shutdown.

Expose this call to the guest and handle it in the same way as PSCI
SYSTEM_RESET, along with some basic range checking on the type argument.

Cc: Marc Zyngier <maz@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220221153524.15397-3-will@kernel.org
---
 include/uapi/linux/psci.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h
index 2fcad1dd0b0e..2bf93c0d6354 100644
--- a/include/uapi/linux/psci.h
+++ b/include/uapi/linux/psci.h
@@ -82,6 +82,10 @@
 #define PSCI_0_2_TOS_UP_NO_MIGRATE		1
 #define PSCI_0_2_TOS_MP				2
 
+/* PSCI v1.1 reset type encoding for SYSTEM_RESET2 */
+#define PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET	0
+#define PSCI_1_1_RESET_TYPE_VENDOR_START	0x80000000U
+
 /* PSCI version decoding (independent of PSCI version) */
 #define PSCI_VERSION_MAJOR_SHIFT		16
 #define PSCI_VERSION_MINOR_MASK			\
-- 
cgit v1.2.3


From a603ca60cebff8589882427a67f870ed946b3fc8 Mon Sep 17 00:00:00 2001
From: Zev Weiss <zev@bewilderbeest.net>
Date: Thu, 10 Feb 2022 16:42:03 -0800
Subject: serial: 8250_aspeed_vuart: add PORT_ASPEED_VUART port type

Commit 54da3e381c2b ("serial: 8250_aspeed_vuart: use UPF_IOREMAP to
set up register mapping") fixed a bug that had, as a side-effect,
prevented the 8250_aspeed_vuart driver from enabling the VUART's
FIFOs.  However, fixing that (and hence enabling the FIFOs) has in
turn revealed what appears to be a hardware bug in the ASPEED VUART in
which the host-side THRE bit doesn't get if the BMC-side receive FIFO
trigger level is set to anything but one byte.  This causes problems
for polled-mode writes from the host -- for example, Linux kernel
console writes proceed at a glacial pace (less than 100 bytes per
second) because the write path waits for a 10ms timeout to expire
after every character instead of being able to continue on to the next
character upon seeing THRE asserted.  (GRUB behaves similarly.)

As a workaround, introduce a new port type for the ASPEED VUART that's
identical to PORT_16550A as it had previously been using, but with
UART_FCR_R_TRIG_00 instead to set the receive FIFO trigger level to
one byte, which (experimentally) seems to avoid the problematic THRE
behavior.

Fixes: 54da3e381c2b ("serial: 8250_aspeed_vuart: use UPF_IOREMAP to set up register mapping")
Tested-by: Konstantin Aladyshev <aladyshev22@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Zev Weiss <zev@bewilderbeest.net>
Link: https://lore.kernel.org/r/20220211004203.14915-1-zev@bewilderbeest.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/serial_core.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index c4042dcfdc0c..8885e69178bd 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -68,6 +68,9 @@
 /* NVIDIA Tegra Combined UART */
 #define PORT_TEGRA_TCU	41
 
+/* ASPEED AST2x00 virtual UART */
+#define PORT_ASPEED_VUART	42
+
 /* Intel EG20 */
 #define PORT_PCH_8LINE	44
 #define PORT_PCH_2LINE	45
-- 
cgit v1.2.3


From a1a5cfe70cd29a59a9a85290dfe95ed1c8df1193 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur@gmail.com>
Date: Mon, 14 Feb 2022 09:38:06 +0200
Subject: iio: introduce mag_referenced

Some accelerometers that support activity and inactivity
events also support a referenced mode, in which the
gravitational acceleration is taken as a point of
reference before comparing the acceleration to the
specified activity and inactivity magnitude.

For example, in the case of the ADXL367, for activity
detection, the formula is:

abs(acceleration - reference) > magnitude

Add a new event type that makes this behavior clear.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Link: https://lore.kernel.org/r/20220214073810.781016-2-cosmin.tanislav@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/uapi/linux/iio/types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h
index 48c13147c0a8..472cead10d8d 100644
--- a/include/uapi/linux/iio/types.h
+++ b/include/uapi/linux/iio/types.h
@@ -104,6 +104,7 @@ enum iio_event_type {
 	IIO_EV_TYPE_THRESH_ADAPTIVE,
 	IIO_EV_TYPE_MAG_ADAPTIVE,
 	IIO_EV_TYPE_CHANGE,
+	IIO_EV_TYPE_MAG_REFERENCED,
 };
 
 enum iio_event_direction {
-- 
cgit v1.2.3


From 0fbb4d93b38bce1f8235aacfa37e90ad8f011473 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Thu, 17 Feb 2022 23:40:32 -0500
Subject: dm: add dm_submit_bio_remap interface

Where possible, switch from early bio-based IO accounting (at the time
DM clones each incoming bio) to late IO accounting just before each
remapped bio is issued to underlying device via submit_bio_noacct().

Allows more precise bio-based IO accounting for DM targets that use
their own workqueues to perform additional processing of each bio in
conjunction with their DM_MAPIO_SUBMITTED return from their map
function. When a target is updated to use dm_submit_bio_remap() they
must also set ti->accounts_remapped_io to true.

Use xchg() in start_io_acct(), as suggested by Mikulas, to ensure each
IO is only started once.  The xchg race only happens if
__send_duplicate_bios() sends multiple bios -- that case is reflected
via tio->is_duplicate_bio.  Given the niche nature of this race, it is
best to avoid any xchg performance penalty for normal IO.

For IO that was never submitted with dm_bio_submit_remap(), but the
target completes the clone with bio_endio, accounting is started then
ended and pending_io counter decremented.

Reviewed-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 include/uapi/linux/dm-ioctl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index c12ce30b52df..2e9550fef90f 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -286,9 +286,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	45
+#define DM_VERSION_MINOR	46
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2021-03-22)"
+#define DM_VERSION_EXTRA	"-ioctl (2022-02-22)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit v1.2.3


From a21d9a670d81103db7f788de1a4a4a6e4b891a0b Mon Sep 17 00:00:00 2001
From: Hans Schultz <schultz.hans@gmail.com>
Date: Wed, 23 Feb 2022 11:16:46 +0100
Subject: net: bridge: Add support for bridge port in locked mode

In a 802.1X scenario, clients connected to a bridge port shall not
be allowed to have traffic forwarded until fully authenticated.
A static fdb entry of the clients MAC address for the bridge port
unlocks the client and allows bidirectional communication.

This scenario is facilitated with setting the bridge port in locked
mode, which is also supported by various switchcore chipsets.

Signed-off-by: Hans Schultz <schultz.hans+netdev@gmail.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index e1ba2d51b717..be09d2ad4b5d 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -537,6 +537,7 @@ enum {
 	IFLA_BRPORT_MRP_IN_OPEN,
 	IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
 	IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
+	IFLA_BRPORT_LOCKED,
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
-- 
cgit v1.2.3


From 1241e329ce2e1f5b1039fd356b75867b29721ad2 Mon Sep 17 00:00:00 2001
From: Subbaraya Sundeep <sbhatta@marvell.com>
Date: Wed, 23 Feb 2022 00:09:12 +0530
Subject: ethtool: add support to set/get completion queue event size

Add support to set completion queue event size via ethtool -G
parameter and get it via ethtool -g parameter.

~ # ./ethtool -G eth0 cqe-size 512
~ # ./ethtool -g eth0
Ring parameters for eth0:
Pre-set maximums:
RX:             1048576
RX Mini:        n/a
RX Jumbo:       n/a
TX:             1048576
Current hardware settings:
RX:             256
RX Mini:        n/a
RX Jumbo:       n/a
TX:             4096
RX Buf Len:             2048
CQE Size:                128

Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ethtool_netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 417d4280d7b5..979850221b8d 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -337,6 +337,7 @@ enum {
 	ETHTOOL_A_RINGS_TX,				/* u32 */
 	ETHTOOL_A_RINGS_RX_BUF_LEN,                     /* u32 */
 	ETHTOOL_A_RINGS_TCP_DATA_SPLIT,			/* u8 */
+	ETHTOOL_A_RINGS_CQE_SIZE,			/* u32 */
 
 	/* add new constants above here */
 	__ETHTOOL_A_RINGS_CNT,
-- 
cgit v1.2.3


From 28a3f0601727d521a1c6cce62ecbcb7402a9e4f5 Mon Sep 17 00:00:00 2001
From: Toms Atteka <cpp.code.lv@gmail.com>
Date: Wed, 23 Feb 2022 16:54:09 -0800
Subject: net: openvswitch: IPv6: Add IPv6 extension header support

This change adds a new OpenFlow field OFPXMT_OFB_IPV6_EXTHDR and
packets can be filtered using ipv6_ext flag.

Signed-off-by: Toms Atteka <cpp.code.lv@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 150bcff49b1c..9d1710f20505 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -351,6 +351,7 @@ enum ovs_key_attr {
 	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,   /* struct ovs_key_ct_tuple_ipv4 */
 	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,   /* struct ovs_key_ct_tuple_ipv6 */
 	OVS_KEY_ATTR_NSH,       /* Nested set of ovs_nsh_key_* */
+	OVS_KEY_ATTR_IPV6_EXTHDRS,  /* struct ovs_key_ipv6_exthdr */
 
 #ifdef __KERNEL__
 	OVS_KEY_ATTR_TUNNEL_INFO,  /* struct ip_tunnel_info */
@@ -430,6 +431,11 @@ struct ovs_key_ipv6 {
 	__u8   ipv6_frag;	/* One of OVS_FRAG_TYPE_*. */
 };
 
+/* separate structure to support backward compatibility with older user space */
+struct ovs_key_ipv6_exthdrs {
+	__u16  hdrs;
+};
+
 struct ovs_key_tcp {
 	__be16 tcp_src;
 	__be16 tcp_dst;
-- 
cgit v1.2.3


From ba7bb663f5547ef474c98df99a97bb4a13c5715f Mon Sep 17 00:00:00 2001
From: David Dunn <daviddunn@google.com>
Date: Wed, 23 Feb 2022 22:57:41 +0000
Subject: KVM: x86: Provide per VM capability for disabling PMU virtualization

Add a new capability, KVM_CAP_PMU_CAPABILITY, that takes a bitmask of
settings/features to allow userspace to configure PMU virtualization on
a per-VM basis.  For now, support a single flag, KVM_PMU_CAP_DISABLE,
to allow disabling PMU virtualization for a VM even when KVM is configured
with enable_pmu=true a module level.

To keep KVM simple, disallow changing VM's PMU configuration after vCPUs
have been created.

Signed-off-by: David Dunn <daviddunn@google.com>
Message-Id: <20220223225743.2703915-2-daviddunn@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a02bbf8fd0f6..d2f1efc3aa35 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1142,6 +1142,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_SYS_ATTRIBUTES 209
 #define KVM_CAP_PPC_AIL_MODE_3 210
 #define KVM_CAP_S390_MEM_OP_EXTENSION 211
+#define KVM_CAP_PMU_CAPABILITY 212
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1978,6 +1979,8 @@ struct kvm_dirty_gfn {
 #define KVM_BUS_LOCK_DETECTION_OFF             (1 << 0)
 #define KVM_BUS_LOCK_DETECTION_EXIT            (1 << 1)
 
+#define KVM_PMU_CAP_DISABLE                    (1 << 0)
+
 /**
  * struct kvm_stats_header - Header of per vm/vcpu binary statistics data.
  * @flags: Some extra information for header, always 0 for now.
-- 
cgit v1.2.3


From 43245eca6e670ebf65908b549641c1460a9cc944 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Wed, 2 Feb 2022 17:55:02 -0500
Subject: NFSv4.1 support for NFS4_RESULT_PRESERVER_UNLINKED

In 4.1+, the server is allowed to set a flag
NFS4_RESULT_PRESERVE_UNLINKED in reply to the OPEN, that tells
the client that it does not need to do a silly rename of an
opened file when it's being removed.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/uapi/linux/nfs4.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index 800bb0ffa6e6..1d2043708bf1 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -45,6 +45,7 @@
 
 #define NFS4_OPEN_RESULT_CONFIRM		0x0002
 #define NFS4_OPEN_RESULT_LOCKTYPE_POSIX		0x0004
+#define NFS4_OPEN_RESULT_PRESERVE_UNLINKED	0x0008
 #define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK	0x0020
 
 #define NFS4_SHARE_ACCESS_MASK	0x000F
-- 
cgit v1.2.3


From 9e8d5470325f25bed7d33f9faaae6d5e4f313650 Mon Sep 17 00:00:00 2001
From: Hammer Hsieh <hammerh0314@gmail.com>
Date: Tue, 22 Feb 2022 17:36:03 +0800
Subject: serial: sunplus-uart: Add Sunplus SoC UART Driver

Add Sunplus SoC UART Driver.
SP7021 UART block contains 5 UARTs.
There are UART0~4 that supported in SP7021, the features list as below.
Support Full-duplex communication.
Support data packet length configurable.
Support stop bit number configurable.
Support force break condition.
Support baud rate configurable.
Support error detection and report.
Support RXD Noise Rejection Vote configurable.

UART0 pinout only support TX/RX two pins.
UART1 to UART4 pinout support TX/RX/CTS/RTS four pins.
Normally UART0 used for kernel console, also can be used for normal uart.
Command line set "console=ttySUP0,115200", SUP means Sunplus Uart Port.
UART driver probe will create path named "/dev/ttySUPx".

https://sunplus.atlassian.net/wiki/spaces/doc/pages/1873412290/13.+Universal+Asynchronous+Receiver+Transmitter+UART

Signed-off-by: Hammer Hsieh <hammerh0314@gmail.com>
Link: https://lore.kernel.org/r/1645522563-17183-3-git-send-email-hammerh0314@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/serial_core.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index 8885e69178bd..6faf502b7860 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -277,4 +277,7 @@
 /* Freescale LINFlexD UART */
 #define PORT_LINFLEXUART	122
 
+/* Sunplus UART */
+#define PORT_SUNPLUS	123
+
 #endif /* _UAPILINUX_SERIAL_CORE_H */
-- 
cgit v1.2.3


From c2faf737abfb10f88f2d2612d573e9edc3c42c37 Mon Sep 17 00:00:00 2001
From: Max Staudt <max@enpas.org>
Date: Fri, 11 Feb 2022 15:10:36 +0100
Subject: tty: Reserve ldisc 29 for development purposes

It's handy to have an ldisc number free for out-of-tree testing. This
way, a new ldisc can be developed on any running system, without having
to recompile the kernel just to define a new number.

This is the highest number (and also the last one) available under the
old numbering scheme, so let's reserve it before it's too late.

From now on, every new ldisc upstreamed will have to increment NR_LDISCS
in lockstep with its addition to the table in tty.h.

Signed-off-by: Max Staudt <max@enpas.org>
Link: https://lore.kernel.org/r/20220211141036.6403-1-max@enpas.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/tty.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tty.h b/include/uapi/linux/tty.h
index a58deb3061eb..9d0f06bfbac3 100644
--- a/include/uapi/linux/tty.h
+++ b/include/uapi/linux/tty.h
@@ -6,8 +6,6 @@
  * 'tty.h' defines some structures used by tty_io.c and some defines.
  */
 
-#define NR_LDISCS		30
-
 /* line disciplines */
 #define N_TTY		0
 #define N_SLIP		1
@@ -39,5 +37,9 @@
 #define N_SPEAKUP	26	/* Speakup communication with synths */
 #define N_NULL		27	/* Null ldisc used for error handling */
 #define N_MCTP		28	/* MCTP-over-serial */
+#define N_DEVELOPMENT	29	/* Manual out-of-tree testing */
+
+/* Always the newest line discipline + 1 */
+#define NR_LDISCS	30
 
 #endif /* _UAPI_LINUX_TTY_H */
-- 
cgit v1.2.3


From fad278388e01e3658a356118bed8ee2c2408d280 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 24 May 2021 21:15:15 -0700
Subject: media: omap3isp: Use struct_group() for memcpy() region

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally writing across neighboring fields. Wrap the target region
in struct_group(). This additionally fixes a theoretical misalignment
of the copy (since the size of "buf" changes between 64-bit and 32-bit,
but this is likely never built for 64-bit).

FWIW, I think this code is totally broken on 64-bit (which appears to
not be a "real" build configuration): it would either always fail (with
an uninitialized data->buf_size) or would cause corruption in userspace
due to the copy_to_user() in the call path against an uninitialized
data->buf value:

omap3isp_stat_request_statistics_time32(...)
    struct omap3isp_stat_data data64;
    ...
    omap3isp_stat_request_statistics(stat, &data64);

int omap3isp_stat_request_statistics(struct ispstat *stat,
                                     struct omap3isp_stat_data *data)
    ...
    buf = isp_stat_buf_get(stat, data);

static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat,
                                               struct omap3isp_stat_data *data)
...
    if (buf->buf_size > data->buf_size) {
            ...
            return ERR_PTR(-EINVAL);
    }
    ...
    rval = copy_to_user(data->buf,
                        buf->virt_addr,
                        buf->buf_size);

Regardless, additionally initialize data64 to be zero-filled to avoid
undefined behavior.

Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: linux-media@vger.kernel.org
Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data")
Cc: stable@vger.kernel.org
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/omap3isp.h | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h
index 87b55755f4ff..d9db7ad43890 100644
--- a/include/uapi/linux/omap3isp.h
+++ b/include/uapi/linux/omap3isp.h
@@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config {
  * struct omap3isp_stat_data - Statistic data sent to or received from user
  * @ts: Timestamp of returned framestats.
  * @buf: Pointer to pass to user.
+ * @buf_size: Size of buffer.
  * @frame_number: Frame number of requested stats.
  * @cur_frame: Current frame number being processed.
  * @config_counter: Number of the configuration associated with the data.
@@ -176,10 +177,12 @@ struct omap3isp_stat_data {
 	struct timeval ts;
 #endif
 	void __user *buf;
-	__u32 buf_size;
-	__u16 frame_number;
-	__u16 cur_frame;
-	__u16 config_counter;
+	__struct_group(/* no tag */, frame, /* no attrs */,
+		__u32 buf_size;
+		__u16 frame_number;
+		__u16 cur_frame;
+		__u16 config_counter;
+	);
 };
 
 #ifdef __KERNEL__
@@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 {
 		__s32	tv_usec;
 	} ts;
 	__u32 buf;
-	__u32 buf_size;
-	__u16 frame_number;
-	__u16 cur_frame;
-	__u16 config_counter;
+	__struct_group(/* no tag */, frame, /* no attrs */,
+		__u32 buf_size;
+		__u16 frame_number;
+		__u16 cur_frame;
+		__u16 config_counter;
+	);
 };
 #endif
 
-- 
cgit v1.2.3


From 89377bc1975c2993bde4a498a3a4e5817ac0ae2c Mon Sep 17 00:00:00 2001
From: Kanchan Joshi <joshi.k@samsung.com>
Date: Thu, 10 Feb 2022 11:07:55 +0530
Subject: nvme: add vectored-io support for user-passthrough

Add a new NVME_IOCTL_IO64_CMD_VEC ioctl that works like the existing
NVME_IOCTL_IO64_CMD ioctl except that it takes and array of iovecs
and thus supports vectored I/O.

  - cmd.addr is base address of user iovec array
  - cmd.vec_cnt is count of iovec array elements

This patch does not include vectored-variant for admin-commands as most
of them are light on buffers and likely to have low invocation frequency.

Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/uapi/linux/nvme_ioctl.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
index d99b5a772698..b2e43185e3b5 100644
--- a/include/uapi/linux/nvme_ioctl.h
+++ b/include/uapi/linux/nvme_ioctl.h
@@ -55,7 +55,10 @@ struct nvme_passthru_cmd64 {
 	__u64	metadata;
 	__u64	addr;
 	__u32	metadata_len;
-	__u32	data_len;
+	union {
+		__u32	data_len; /* for non-vectored io */
+		__u32	vec_cnt; /* for vectored io */
+	};
 	__u32	cdw10;
 	__u32	cdw11;
 	__u32	cdw12;
@@ -78,5 +81,6 @@ struct nvme_passthru_cmd64 {
 #define NVME_IOCTL_RESCAN	_IO('N', 0x46)
 #define NVME_IOCTL_ADMIN64_CMD	_IOWR('N', 0x47, struct nvme_passthru_cmd64)
 #define NVME_IOCTL_IO64_CMD	_IOWR('N', 0x48, struct nvme_passthru_cmd64)
+#define NVME_IOCTL_IO64_CMD_VEC	_IOWR('N', 0x49, struct nvme_passthru_cmd64)
 
 #endif /* _UAPI_LINUX_NVME_IOCTL_H */
-- 
cgit v1.2.3


From 0c9f178778919bb500443f340647b44227778fb2 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Wed, 16 Feb 2022 10:52:42 +0800
Subject: iommu: Remove guest pasid related interfaces and definitions

The guest pasid related uapi interfaces and definitions are not referenced
anywhere in the tree. We've also reached a consensus to replace them with
a new iommufd design. Remove them to avoid dead code.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20220216025249.3459465-3-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/uapi/linux/iommu.h | 181 ---------------------------------------------
 1 file changed, 181 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
index 59178fc229ca..65d8b0234f69 100644
--- a/include/uapi/linux/iommu.h
+++ b/include/uapi/linux/iommu.h
@@ -158,185 +158,4 @@ struct iommu_page_response {
 	__u32	code;
 };
 
-/* defines the granularity of the invalidation */
-enum iommu_inv_granularity {
-	IOMMU_INV_GRANU_DOMAIN,	/* domain-selective invalidation */
-	IOMMU_INV_GRANU_PASID,	/* PASID-selective invalidation */
-	IOMMU_INV_GRANU_ADDR,	/* page-selective invalidation */
-	IOMMU_INV_GRANU_NR,	/* number of invalidation granularities */
-};
-
-/**
- * struct iommu_inv_addr_info - Address Selective Invalidation Structure
- *
- * @flags: indicates the granularity of the address-selective invalidation
- * - If the PASID bit is set, the @pasid field is populated and the invalidation
- *   relates to cache entries tagged with this PASID and matching the address
- *   range.
- * - If ARCHID bit is set, @archid is populated and the invalidation relates
- *   to cache entries tagged with this architecture specific ID and matching
- *   the address range.
- * - Both PASID and ARCHID can be set as they may tag different caches.
- * - If neither PASID or ARCHID is set, global addr invalidation applies.
- * - The LEAF flag indicates whether only the leaf PTE caching needs to be
- *   invalidated and other paging structure caches can be preserved.
- * @pasid: process address space ID
- * @archid: architecture-specific ID
- * @addr: first stage/level input address
- * @granule_size: page/block size of the mapping in bytes
- * @nb_granules: number of contiguous granules to be invalidated
- */
-struct iommu_inv_addr_info {
-#define IOMMU_INV_ADDR_FLAGS_PASID	(1 << 0)
-#define IOMMU_INV_ADDR_FLAGS_ARCHID	(1 << 1)
-#define IOMMU_INV_ADDR_FLAGS_LEAF	(1 << 2)
-	__u32	flags;
-	__u32	archid;
-	__u64	pasid;
-	__u64	addr;
-	__u64	granule_size;
-	__u64	nb_granules;
-};
-
-/**
- * struct iommu_inv_pasid_info - PASID Selective Invalidation Structure
- *
- * @flags: indicates the granularity of the PASID-selective invalidation
- * - If the PASID bit is set, the @pasid field is populated and the invalidation
- *   relates to cache entries tagged with this PASID and matching the address
- *   range.
- * - If the ARCHID bit is set, the @archid is populated and the invalidation
- *   relates to cache entries tagged with this architecture specific ID and
- *   matching the address range.
- * - Both PASID and ARCHID can be set as they may tag different caches.
- * - At least one of PASID or ARCHID must be set.
- * @pasid: process address space ID
- * @archid: architecture-specific ID
- */
-struct iommu_inv_pasid_info {
-#define IOMMU_INV_PASID_FLAGS_PASID	(1 << 0)
-#define IOMMU_INV_PASID_FLAGS_ARCHID	(1 << 1)
-	__u32	flags;
-	__u32	archid;
-	__u64	pasid;
-};
-
-/**
- * struct iommu_cache_invalidate_info - First level/stage invalidation
- *     information
- * @argsz: User filled size of this data
- * @version: API version of this structure
- * @cache: bitfield that allows to select which caches to invalidate
- * @granularity: defines the lowest granularity used for the invalidation:
- *     domain > PASID > addr
- * @padding: reserved for future use (should be zero)
- * @pasid_info: invalidation data when @granularity is %IOMMU_INV_GRANU_PASID
- * @addr_info: invalidation data when @granularity is %IOMMU_INV_GRANU_ADDR
- *
- * Not all the combinations of cache/granularity are valid:
- *
- * +--------------+---------------+---------------+---------------+
- * | type /       |   DEV_IOTLB   |     IOTLB     |      PASID    |
- * | granularity  |               |               |      cache    |
- * +==============+===============+===============+===============+
- * | DOMAIN       |       N/A     |       Y       |       Y       |
- * +--------------+---------------+---------------+---------------+
- * | PASID        |       Y       |       Y       |       Y       |
- * +--------------+---------------+---------------+---------------+
- * | ADDR         |       Y       |       Y       |       N/A     |
- * +--------------+---------------+---------------+---------------+
- *
- * Invalidations by %IOMMU_INV_GRANU_DOMAIN don't take any argument other than
- * @version and @cache.
- *
- * If multiple cache types are invalidated simultaneously, they all
- * must support the used granularity.
- */
-struct iommu_cache_invalidate_info {
-	__u32	argsz;
-#define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1
-	__u32	version;
-/* IOMMU paging structure cache */
-#define IOMMU_CACHE_INV_TYPE_IOTLB	(1 << 0) /* IOMMU IOTLB */
-#define IOMMU_CACHE_INV_TYPE_DEV_IOTLB	(1 << 1) /* Device IOTLB */
-#define IOMMU_CACHE_INV_TYPE_PASID	(1 << 2) /* PASID cache */
-#define IOMMU_CACHE_INV_TYPE_NR		(3)
-	__u8	cache;
-	__u8	granularity;
-	__u8	padding[6];
-	union {
-		struct iommu_inv_pasid_info pasid_info;
-		struct iommu_inv_addr_info addr_info;
-	} granu;
-};
-
-/**
- * struct iommu_gpasid_bind_data_vtd - Intel VT-d specific data on device and guest
- * SVA binding.
- *
- * @flags:	VT-d PASID table entry attributes
- * @pat:	Page attribute table data to compute effective memory type
- * @emt:	Extended memory type
- *
- * Only guest vIOMMU selectable and effective options are passed down to
- * the host IOMMU.
- */
-struct iommu_gpasid_bind_data_vtd {
-#define IOMMU_SVA_VTD_GPASID_SRE	(1 << 0) /* supervisor request */
-#define IOMMU_SVA_VTD_GPASID_EAFE	(1 << 1) /* extended access enable */
-#define IOMMU_SVA_VTD_GPASID_PCD	(1 << 2) /* page-level cache disable */
-#define IOMMU_SVA_VTD_GPASID_PWT	(1 << 3) /* page-level write through */
-#define IOMMU_SVA_VTD_GPASID_EMTE	(1 << 4) /* extended mem type enable */
-#define IOMMU_SVA_VTD_GPASID_CD		(1 << 5) /* PASID-level cache disable */
-#define IOMMU_SVA_VTD_GPASID_WPE	(1 << 6) /* Write protect enable */
-#define IOMMU_SVA_VTD_GPASID_LAST	(1 << 7)
-	__u64 flags;
-	__u32 pat;
-	__u32 emt;
-};
-
-#define IOMMU_SVA_VTD_GPASID_MTS_MASK	(IOMMU_SVA_VTD_GPASID_CD | \
-					 IOMMU_SVA_VTD_GPASID_EMTE | \
-					 IOMMU_SVA_VTD_GPASID_PCD |  \
-					 IOMMU_SVA_VTD_GPASID_PWT)
-
-/**
- * struct iommu_gpasid_bind_data - Information about device and guest PASID binding
- * @argsz:	User filled size of this data
- * @version:	Version of this data structure
- * @format:	PASID table entry format
- * @flags:	Additional information on guest bind request
- * @gpgd:	Guest page directory base of the guest mm to bind
- * @hpasid:	Process address space ID used for the guest mm in host IOMMU
- * @gpasid:	Process address space ID used for the guest mm in guest IOMMU
- * @addr_width:	Guest virtual address width
- * @padding:	Reserved for future use (should be zero)
- * @vtd:	Intel VT-d specific data
- *
- * Guest to host PASID mapping can be an identity or non-identity, where guest
- * has its own PASID space. For non-identify mapping, guest to host PASID lookup
- * is needed when VM programs guest PASID into an assigned device. VMM may
- * trap such PASID programming then request host IOMMU driver to convert guest
- * PASID to host PASID based on this bind data.
- */
-struct iommu_gpasid_bind_data {
-	__u32 argsz;
-#define IOMMU_GPASID_BIND_VERSION_1	1
-	__u32 version;
-#define IOMMU_PASID_FORMAT_INTEL_VTD	1
-#define IOMMU_PASID_FORMAT_LAST		2
-	__u32 format;
-	__u32 addr_width;
-#define IOMMU_SVA_GPASID_VAL	(1 << 0) /* guest PASID valid */
-	__u64 flags;
-	__u64 gpgd;
-	__u64 hpasid;
-	__u64 gpasid;
-	__u8  padding[8];
-	/* Vendor specific data */
-	union {
-		struct iommu_gpasid_bind_data_vtd vtd;
-	} vendor;
-};
-
 #endif /* _UAPI_IOMMU_H */
-- 
cgit v1.2.3


From 7b8135f4df98b155b23754b6065c157861e268f1 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@nvidia.com>
Date: Tue, 1 Mar 2022 05:04:34 +0000
Subject: rtnetlink: add new rtm tunnel api for tunnel id filtering

This patch adds new rtm tunnel msg and api for tunnel id
filtering in dst_metadata devices. First dst_metadata
device to use the api is vxlan driver with AF_BRIDGE
family.

This and later changes add ability in vxlan driver to do
tunnel id filtering (or vni filtering) on dst_metadata
devices. This is similar to vlan api in the vlan filtering bridge.

this patch includes selinux nlmsg_route_perms support for RTM_*TUNNEL
api from Benjamin Poirier.

Signed-off-by: Roopa Prabhu <roopa@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h   | 26 ++++++++++++++++++++++++++
 include/uapi/linux/rtnetlink.h |  9 +++++++++
 2 files changed, 35 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index be09d2ad4b5d..3dfc9ff2ec9b 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -713,7 +713,32 @@ enum ipvlan_mode {
 #define IPVLAN_F_PRIVATE	0x01
 #define IPVLAN_F_VEPA		0x02
 
+/* Tunnel RTM header */
+struct tunnel_msg {
+	__u8 family;
+	__u8 reserved1;
+	__u16 reserved2;
+	__u32 ifindex;
+};
+
 /* VXLAN section */
+enum {
+	VXLAN_VNIFILTER_ENTRY_UNSPEC,
+	VXLAN_VNIFILTER_ENTRY_START,
+	VXLAN_VNIFILTER_ENTRY_END,
+	VXLAN_VNIFILTER_ENTRY_GROUP,
+	VXLAN_VNIFILTER_ENTRY_GROUP6,
+	__VXLAN_VNIFILTER_ENTRY_MAX
+};
+#define VXLAN_VNIFILTER_ENTRY_MAX	(__VXLAN_VNIFILTER_ENTRY_MAX - 1)
+
+enum {
+	VXLAN_VNIFILTER_UNSPEC,
+	VXLAN_VNIFILTER_ENTRY,
+	__VXLAN_VNIFILTER_MAX
+};
+#define VXLAN_VNIFILTER_MAX	(__VXLAN_VNIFILTER_MAX - 1)
+
 enum {
 	IFLA_VXLAN_UNSPEC,
 	IFLA_VXLAN_ID,
@@ -745,6 +770,7 @@ enum {
 	IFLA_VXLAN_GPE,
 	IFLA_VXLAN_TTL_INHERIT,
 	IFLA_VXLAN_DF,
+	IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 93d934cc4613..0970cb4b1b88 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -185,6 +185,13 @@ enum {
 	RTM_GETNEXTHOPBUCKET,
 #define RTM_GETNEXTHOPBUCKET	RTM_GETNEXTHOPBUCKET
 
+	RTM_NEWTUNNEL = 120,
+#define RTM_NEWTUNNEL	RTM_NEWTUNNEL
+	RTM_DELTUNNEL,
+#define RTM_DELTUNNEL	RTM_DELTUNNEL
+	RTM_GETTUNNEL,
+#define RTM_GETTUNNEL	RTM_GETTUNNEL
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
@@ -756,6 +763,8 @@ enum rtnetlink_groups {
 #define RTNLGRP_BRVLAN		RTNLGRP_BRVLAN
 	RTNLGRP_MCTP_IFADDR,
 #define RTNLGRP_MCTP_IFADDR	RTNLGRP_MCTP_IFADDR
+	RTNLGRP_TUNNEL,
+#define RTNLGRP_TUNNEL		RTNLGRP_TUNNEL
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
-- 
cgit v1.2.3


From 445b2f36bb4efb81f064e931f28b9ec19f114355 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 1 Mar 2022 05:04:39 +0000
Subject: drivers: vxlan: vnifilter: add support for stats dumping

Add support for VXLAN vni filter entries' stats dumping

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 3dfc9ff2ec9b..e315e53125f4 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -716,18 +716,41 @@ enum ipvlan_mode {
 /* Tunnel RTM header */
 struct tunnel_msg {
 	__u8 family;
-	__u8 reserved1;
+	__u8 flags;
 	__u16 reserved2;
 	__u32 ifindex;
 };
 
 /* VXLAN section */
+
+/* include statistics in the dump */
+#define TUNNEL_MSG_FLAG_STATS	0x01
+
+#define TUNNEL_MSG_VALID_USER_FLAGS TUNNEL_MSG_FLAG_STATS
+
+/* Embedded inside VXLAN_VNIFILTER_ENTRY_STATS */
+enum {
+	VNIFILTER_ENTRY_STATS_UNSPEC,
+	VNIFILTER_ENTRY_STATS_RX_BYTES,
+	VNIFILTER_ENTRY_STATS_RX_PKTS,
+	VNIFILTER_ENTRY_STATS_RX_DROPS,
+	VNIFILTER_ENTRY_STATS_RX_ERRORS,
+	VNIFILTER_ENTRY_STATS_TX_BYTES,
+	VNIFILTER_ENTRY_STATS_TX_PKTS,
+	VNIFILTER_ENTRY_STATS_TX_DROPS,
+	VNIFILTER_ENTRY_STATS_TX_ERRORS,
+	VNIFILTER_ENTRY_STATS_PAD,
+	__VNIFILTER_ENTRY_STATS_MAX
+};
+#define VNIFILTER_ENTRY_STATS_MAX (__VNIFILTER_ENTRY_STATS_MAX - 1)
+
 enum {
 	VXLAN_VNIFILTER_ENTRY_UNSPEC,
 	VXLAN_VNIFILTER_ENTRY_START,
 	VXLAN_VNIFILTER_ENTRY_END,
 	VXLAN_VNIFILTER_ENTRY_GROUP,
 	VXLAN_VNIFILTER_ENTRY_GROUP6,
+	VXLAN_VNIFILTER_ENTRY_STATS,
 	__VXLAN_VNIFILTER_ENTRY_MAX
 };
 #define VXLAN_VNIFILTER_ENTRY_MAX	(__VXLAN_VNIFILTER_ENTRY_MAX - 1)
-- 
cgit v1.2.3


From cedd3614e5d9c80908099c19f8716714ce0610b1 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Thu, 24 Feb 2022 11:06:54 +0530
Subject: perf: Add irq and exception return branch types

This expands generic branch type classification by adding two more entries
there in i.e irq and exception return. Also updates the x86 implementation
to process X86_BR_IRET and X86_BR_IRQ records as appropriate. This changes
branch types reported to user space on x86 platform but it should not be a
problem. The possible scenarios and impacts are enumerated here.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/1645681014-3346-1-git-send-email-anshuman.khandual@arm.com
---
 include/uapi/linux/perf_event.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1b65042ab1db..7dc71768749d 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -251,6 +251,8 @@ enum {
 	PERF_BR_SYSRET		= 8,	/* syscall return */
 	PERF_BR_COND_CALL	= 9,	/* conditional function call */
 	PERF_BR_COND_RET	= 10,	/* conditional function return */
+	PERF_BR_ERET		= 11,	/* exception return */
+	PERF_BR_IRQ		= 12,	/* irq */
 	PERF_BR_MAX,
 };
 
-- 
cgit v1.2.3


From 4f0bfdfd8323e5b461fc1042143a1097dba9fced Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 23 Feb 2022 19:34:31 -0800
Subject: ELF: Properly redefine PT_GNU_* in terms of PT_LOOS

The PT_GNU_* program header types are actually offsets from PT_LOOS,
so redefine them as such, reorder them, and add the missing PT_GNU_RELRO.

Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Peter Collingbourne <pcc@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/elf.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 61bf4774b8f2..6438d55529bf 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -35,10 +35,11 @@ typedef __s64	Elf64_Sxword;
 #define PT_HIOS    0x6fffffff      /* OS-specific */
 #define PT_LOPROC  0x70000000
 #define PT_HIPROC  0x7fffffff
-#define PT_GNU_EH_FRAME		0x6474e550
-#define PT_GNU_PROPERTY		0x6474e553
-
+#define PT_GNU_EH_FRAME	(PT_LOOS + 0x474e550)
 #define PT_GNU_STACK	(PT_LOOS + 0x474e551)
+#define PT_GNU_RELRO	(PT_LOOS + 0x474e552)
+#define PT_GNU_PROPERTY	(PT_LOOS + 0x474e553)
+
 
 /*
  * Extended Numbering
-- 
cgit v1.2.3


From dd0ca255f3d27a1bb43d8e9529fb3645f9a341a3 Mon Sep 17 00:00:00 2001
From: Daniel Braunwarth <daniel@braunwarth.dev>
Date: Mon, 28 Feb 2022 14:30:28 +0100
Subject: if_ether.h: add PROFINET Ethertype

Add the Ethertype for PROFINET protocol.

Signed-off-by: Daniel Braunwarth <daniel@braunwarth.dev>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index c0c2f3ed5729..4f4ed35a16db 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -86,6 +86,7 @@
 					 * over Ethernet
 					 */
 #define ETH_P_PAE	0x888E		/* Port Access Entity (IEEE 802.1X) */
+#define ETH_P_PROFINET	0x8892		/* PROFINET			*/
 #define ETH_P_REALTEK	0x8899          /* Multiple proprietary protocols */
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_8021AD	0x88A8          /* 802.1ad Service VLAN		*/
-- 
cgit v1.2.3


From cd73cda742fbe1f33ed7306c7a01aa64f4e6ebd5 Mon Sep 17 00:00:00 2001
From: Daniel Braunwarth <daniel@braunwarth.dev>
Date: Mon, 28 Feb 2022 14:30:29 +0100
Subject: if_ether.h: add EtherCAT Ethertype

Add the Ethertype for EtherCAT protocol.

Signed-off-by: Daniel Braunwarth <daniel@braunwarth.dev>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 4f4ed35a16db..1d0bccc3fa54 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -89,6 +89,7 @@
 #define ETH_P_PROFINET	0x8892		/* PROFINET			*/
 #define ETH_P_REALTEK	0x8899          /* Multiple proprietary protocols */
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
+#define ETH_P_ETHERCAT	0x88A4		/* EtherCAT			*/
 #define ETH_P_8021AD	0x88A8          /* 802.1ad Service VLAN		*/
 #define ETH_P_802_EX1	0x88B5		/* 802.1 Local Experimental 1.  */
 #define ETH_P_PREAUTH	0x88C7		/* 802.11 Preauthentication */
-- 
cgit v1.2.3


From d58b8a99cbb84c1eb3b3613d23c1a328695a9455 Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang@amd.com>
Date: Fri, 25 Feb 2022 12:33:51 -0500
Subject: drm/amdkfd: Add SMI add event helper

To remove duplicate code, unify event message format and simplify new
event add in the following patches.

Use KFD_SMI_EVENT_MSG_SIZE to define msg size, the same size will be
used in user space to alloc the msg receive buffer.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index baec5a41de3e..b40687bf1014 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -463,6 +463,7 @@ enum kfd_smi_event {
 };
 
 #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
+#define KFD_SMI_EVENT_MSG_SIZE	96
 
 struct kfd_ioctl_smi_events_args {
 	__u32 gpuid;	/* to KFD */
-- 
cgit v1.2.3


From 46efc97b73060823fdc18103a5e317a8327d44e1 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Wed, 2 Mar 2022 18:31:17 +0200
Subject: net: rtnetlink: RTM_GETSTATS: Allow filtering inside nests

The filter_mask field of RTM_GETSTATS header determines which top-level
attributes should be included in the netlink response. This saves
processing time by only including the bits that the user cares about
instead of always dumping everything. This is doubly important for
HW-backed statistics that would typically require a trip to the device to
fetch the stats.

So far there was only one HW-backed stat suite per attribute. However,
IFLA_STATS_LINK_OFFLOAD_XSTATS is a nest, and will gain a new stat suite in
the following patches. It would therefore be advantageous to be able to
filter within that nest, and select just one or the other HW-backed
statistics suite.

Extend rtnetlink so that RTM_GETSTATS permits attributes in the payload.
The scheme is as follows:

    - RTM_GETSTATS
	- struct if_stats_msg
	- attr nest IFLA_STATS_GET_FILTERS
	    - attr IFLA_STATS_LINK_OFFLOAD_XSTATS
		- u32 filter_mask

This scheme reuses the existing enumerators by nesting them in a dedicated
context attribute. This is covered by policies as usual, therefore a
gradual opt-in is possible. Currently only IFLA_STATS_LINK_OFFLOAD_XSTATS
nest has filtering enabled, because for the SW counters the issue does not
seem to be that important.

rtnl_offload_xstats_get_size() and _fill() are extended to observe the
requested filters.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index e315e53125f4..4d62ea6e1288 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1207,6 +1207,16 @@ enum {
 
 #define IFLA_STATS_FILTER_BIT(ATTR)	(1 << (ATTR - 1))
 
+enum {
+	IFLA_STATS_GETSET_UNSPEC,
+	IFLA_STATS_GET_FILTERS, /* Nest of IFLA_STATS_LINK_xxx, each a u32 with
+				 * a filter mask for the corresponding group.
+				 */
+	__IFLA_STATS_GETSET_MAX,
+};
+
+#define IFLA_STATS_GETSET_MAX (__IFLA_STATS_GETSET_MAX - 1)
+
 /* These are embedded into IFLA_STATS_LINK_XSTATS:
  * [IFLA_STATS_LINK_XSTATS]
  * -> [LINK_XSTATS_TYPE_xxx]
-- 
cgit v1.2.3


From 9309f97aef6d8250bb484dabeac925c3a7c57716 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Wed, 2 Mar 2022 18:31:20 +0200
Subject: net: dev: Add hardware stats support

Offloading switch device drivers may be able to collect statistics of the
traffic taking place in the HW datapath that pertains to a certain soft
netdevice, such as VLAN. Add the necessary infrastructure to allow exposing
these statistics to the offloaded netdevice in question. The API was shaped
by the following considerations:

- Collection of HW statistics is not free: there may be a finite number of
  counters, and the act of counting may have a performance impact. It is
  therefore necessary to allow toggling whether HW counting should be done
  for any particular SW netdevice.

- As the drivers are loaded and removed, a particular device may get
  offloaded and unoffloaded again. At the same time, the statistics values
  need to stay monotonic (modulo the eventual 64-bit wraparound),
  increasing only to reflect traffic measured in the device.

  To that end, the netdevice keeps around a lazily-allocated copy of struct
  rtnl_link_stats64. Device drivers then contribute to the values kept
  therein at various points. Even as the driver goes away, the struct stays
  around to maintain the statistics values.

- Different HW devices may be able to count different things. The
  motivation behind this patch in particular is exposure of HW counters on
  Nvidia Spectrum switches, where the only practical approach to counting
  traffic on offloaded soft netdevices currently is to use router interface
  counters, and count L3 traffic. Correspondingly that is the statistics
  suite added in this patch.

  Other devices may be able to measure different kinds of traffic, and for
  that reason, the APIs are built to allow uniform access to different
  statistics suites.

- Because soft netdevices and offloading drivers are only loosely bound, a
  netdevice uses a notifier chain to communicate with the drivers. Several
  new notifiers, NETDEV_OFFLOAD_XSTATS_*, have been added to carry messages
  to the offloading drivers.

- Devices can have various conditions for when a particular counter is
  available. As the device is configured and reconfigured, the device
  offload may become or cease being suitable for counter binding. A
  netdevice can use a notifier type NETDEV_OFFLOAD_XSTATS_REPORT_USED to
  ping offloading drivers and determine whether anyone currently implements
  a given statistics suite. This information can then be propagated to user
  space.

  When the driver decides to unoffload a netdevice, it can use a
  newly-added function, netdev_offload_xstats_report_delta(), to record
  outstanding collected statistics, before destroying the HW counter.

This patch adds a helper, call_netdevice_notifiers_info_robust(), for
dispatching a notifier with the possibility of unwind when one of the
consumers bails. Given the wish to eventually get rid of the global
notifier block altogether, this helper only invokes the per-netns notifier
block.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 4d62ea6e1288..ef6a62a2e15d 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -245,6 +245,21 @@ struct rtnl_link_stats64 {
 	__u64	rx_nohandler;
 };
 
+/* Subset of link stats useful for in-HW collection. Meaning of the fields is as
+ * for struct rtnl_link_stats64.
+ */
+struct rtnl_hw_stats64 {
+	__u64	rx_packets;
+	__u64	tx_packets;
+	__u64	rx_bytes;
+	__u64	tx_bytes;
+	__u64	rx_errors;
+	__u64	tx_errors;
+	__u64	rx_dropped;
+	__u64	tx_dropped;
+	__u64	multicast;
+};
+
 /* The struct should be in sync with struct ifmap */
 struct rtnl_link_ifmap {
 	__u64	mem_start;
-- 
cgit v1.2.3


From 0e7788fd76222dba3229eada9162efab185923fc Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Wed, 2 Mar 2022 18:31:21 +0200
Subject: net: rtnetlink: Add UAPI for obtaining L3 offload xstats

Add a new IFLA_STATS_LINK_OFFLOAD_XSTATS child attribute,
IFLA_OFFLOAD_XSTATS_L3_STATS, to carry statistics for traffic that takes
place in a HW router.

The offloaded HW stats are designed to allow per-netdevice enablement and
disablement. Additionally, as a netdevice is configured, it may become or
cease being suitable for binding of a HW counter. Both of these aspects
need to be communicated to the userspace. To that end, add another child
attribute, IFLA_OFFLOAD_XSTATS_HW_S_INFO:

    - attr nest IFLA_OFFLOAD_XSTATS_HW_S_INFO
	- attr nest IFLA_OFFLOAD_XSTATS_L3_STATS
 	    - attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST
	      - {0,1} as u8
 	    - attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED
	      - {0,1} as u8

Thus this one attribute is a nest that can be used to carry information
about various types of HW statistics, and indexing is very simply done by
wrapping the information for a given statistics suite into the attribute
that carries the suite is the RTM_GETSTATS query. At the same time, because
_HW_S_INFO is nested directly below IFLA_STATS_LINK_OFFLOAD_XSTATS, it is
possible through filtering to request only the metadata about individual
statistics suites, without having to hit the HW to get the actual counters.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index ef6a62a2e15d..b1031f481d2f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1249,10 +1249,21 @@ enum {
 enum {
 	IFLA_OFFLOAD_XSTATS_UNSPEC,
 	IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */
+	IFLA_OFFLOAD_XSTATS_HW_S_INFO,	/* HW stats info. A nest */
+	IFLA_OFFLOAD_XSTATS_L3_STATS,	/* struct rtnl_hw_stats64 */
 	__IFLA_OFFLOAD_XSTATS_MAX
 };
 #define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1)
 
+enum {
+	IFLA_OFFLOAD_XSTATS_HW_S_INFO_UNSPEC,
+	IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST,		/* u8 */
+	IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED,		/* u8 */
+	__IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX,
+};
+#define IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX \
+	(__IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX - 1)
+
 /* XDP section */
 
 #define XDP_FLAGS_UPDATE_IF_NOEXIST	(1U << 0)
-- 
cgit v1.2.3


From 03ba35667091337d8e632cf4b814f1c1b914609b Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Wed, 2 Mar 2022 18:31:22 +0200
Subject: net: rtnetlink: Add RTM_SETSTATS

The offloaded HW stats are designed to allow per-netdevice enablement and
disablement. These stats are only accessible through RTM_GETSTATS, and
therefore should be toggled by a RTM_SETSTATS message. Add it, and the
necessary skeleton handler.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 0970cb4b1b88..14462dc159fd 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -146,6 +146,8 @@ enum {
 #define RTM_NEWSTATS RTM_NEWSTATS
 	RTM_GETSTATS = 94,
 #define RTM_GETSTATS RTM_GETSTATS
+	RTM_SETSTATS,
+#define RTM_SETSTATS RTM_SETSTATS
 
 	RTM_NEWCACHEREPORT = 96,
 #define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT
-- 
cgit v1.2.3


From 5fd0b838efac16046509f7fb100455d0463b9687 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Wed, 2 Mar 2022 18:31:23 +0200
Subject: net: rtnetlink: Add UAPI toggle for IFLA_OFFLOAD_XSTATS_L3_STATS

The offloaded HW stats are designed to allow per-netdevice enablement and
disablement. Add an attribute, IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS,
which should be carried by the RTM_SETSTATS message, and expresses a desire
to toggle L3 offload xstats on or off.

As part of the above, add an exported function rtnl_offload_xstats_notify()
that drivers can use when they have installed or deinstalled the counters
backing the HW stats.

At this point, it is possible to enable, disable and query L3 offload
xstats on netdevices. (However there is no driver actually implementing
these.)

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h   | 1 +
 include/uapi/linux/rtnetlink.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index b1031f481d2f..ddca20357e7e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1227,6 +1227,7 @@ enum {
 	IFLA_STATS_GET_FILTERS, /* Nest of IFLA_STATS_LINK_xxx, each a u32 with
 				 * a filter mask for the corresponding group.
 				 */
+	IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS, /* 0 or 1 as u8 */
 	__IFLA_STATS_GETSET_MAX,
 };
 
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 14462dc159fd..51530aade46e 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -767,6 +767,8 @@ enum rtnetlink_groups {
 #define RTNLGRP_MCTP_IFADDR	RTNLGRP_MCTP_IFADDR
 	RTNLGRP_TUNNEL,
 #define RTNLGRP_TUNNEL		RTNLGRP_TUNNEL
+	RTNLGRP_STATS,
+#define RTNLGRP_STATS		RTNLGRP_STATS
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
-- 
cgit v1.2.3


From 115dcec65f61d53e25e1bed5e380468b30f98b14 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Thu, 24 Feb 2022 16:20:18 +0200
Subject: vfio: Define device migration protocol v2

Replace the existing region based migration protocol with an ioctl based
protocol. The two protocols have the same general semantic behaviors, but
the way the data is transported is changed.

This is the STOP_COPY portion of the new protocol, it defines the 5 states
for basic stop and copy migration and the protocol to move the migration
data in/out of the kernel.

Compared to the clarification of the v1 protocol Alex proposed:

https://lore.kernel.org/r/163909282574.728533.7460416142511440919.stgit@omen

This has a few deliberate functional differences:

 - ERROR arcs allow the device function to remain unchanged.

 - The protocol is not required to return to the original state on
   transition failure. Instead userspace can execute an unwind back to
   the original state, reset, or do something else without needing kernel
   support. This simplifies the kernel design and should userspace choose
   a policy like always reset, avoids doing useless work in the kernel
   on error handling paths.

 - PRE_COPY is made optional, userspace must discover it before using it.
   This reflects the fact that the majority of drivers we are aware of
   right now will not implement PRE_COPY.

 - segmentation is not part of the data stream protocol, the receiver
   does not have to reproduce the framing boundaries.

The hybrid FSM for the device_state is described as a Mealy machine by
documenting each of the arcs the driver is required to implement. Defining
the remaining set of old/new device_state transitions as 'combination
transitions' which are naturally defined as taking multiple FSM arcs along
the shortest path within the FSM's digraph allows a complete matrix of
transitions.

A new VFIO_DEVICE_FEATURE of VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE is
defined to replace writing to the device_state field in the region. This
allows returning a brand new FD whenever the requested transition opens
a data transfer session.

The VFIO core code implements the new feature and provides a helper
function to the driver. Using the helper the driver only has to
implement 6 of the FSM arcs and the other combination transitions are
elaborated consistently from those arcs.

A new VFIO_DEVICE_FEATURE of VFIO_DEVICE_FEATURE_MIGRATION is defined to
report the capability for migration and indicate which set of states and
arcs are supported by the device. The FSM provides a lot of flexibility to
make backwards compatible extensions but the VFIO_DEVICE_FEATURE also
allows for future breaking extensions for scenarios that cannot support
even the basic STOP_COPY requirements.

The VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE with the GET option (i.e.
VFIO_DEVICE_FEATURE_GET) can be used to read the current migration state
of the VFIO device.

Data transfer sessions are now carried over a file descriptor, instead of
the region. The FD functions for the lifetime of the data transfer
session. read() and write() transfer the data with normal Linux stream FD
semantics. This design allows future expansion to support poll(),
io_uring, and other performance optimizations.

The complicated mmap mode for data transfer is discarded as current qemu
doesn't take meaningful advantage of it, and the new qemu implementation
avoids substantially all the performance penalty of using a read() on the
region.

Link: https://lore.kernel.org/all/20220224142024.147653-10-yishaih@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/uapi/linux/vfio.h | 174 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 161 insertions(+), 13 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index ef33ea002b0b..22ed358c04c5 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -605,25 +605,25 @@ struct vfio_region_gfx_edid {
 
 struct vfio_device_migration_info {
 	__u32 device_state;         /* VFIO device state */
-#define VFIO_DEVICE_STATE_STOP      (0)
-#define VFIO_DEVICE_STATE_RUNNING   (1 << 0)
-#define VFIO_DEVICE_STATE_SAVING    (1 << 1)
-#define VFIO_DEVICE_STATE_RESUMING  (1 << 2)
-#define VFIO_DEVICE_STATE_MASK      (VFIO_DEVICE_STATE_RUNNING | \
-				     VFIO_DEVICE_STATE_SAVING |  \
-				     VFIO_DEVICE_STATE_RESUMING)
+#define VFIO_DEVICE_STATE_V1_STOP      (0)
+#define VFIO_DEVICE_STATE_V1_RUNNING   (1 << 0)
+#define VFIO_DEVICE_STATE_V1_SAVING    (1 << 1)
+#define VFIO_DEVICE_STATE_V1_RESUMING  (1 << 2)
+#define VFIO_DEVICE_STATE_MASK      (VFIO_DEVICE_STATE_V1_RUNNING | \
+				     VFIO_DEVICE_STATE_V1_SAVING |  \
+				     VFIO_DEVICE_STATE_V1_RESUMING)
 
 #define VFIO_DEVICE_STATE_VALID(state) \
-	(state & VFIO_DEVICE_STATE_RESUMING ? \
-	(state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1)
+	(state & VFIO_DEVICE_STATE_V1_RESUMING ? \
+	(state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_V1_RESUMING : 1)
 
 #define VFIO_DEVICE_STATE_IS_ERROR(state) \
-	((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \
-					      VFIO_DEVICE_STATE_RESUMING))
+	((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_V1_SAVING | \
+					      VFIO_DEVICE_STATE_V1_RESUMING))
 
 #define VFIO_DEVICE_STATE_SET_ERROR(state) \
-	((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \
-					     VFIO_DEVICE_STATE_RESUMING)
+	((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_STATE_V1_SAVING | \
+					     VFIO_DEVICE_STATE_V1_RESUMING)
 
 	__u32 reserved;
 	__u64 pending_bytes;
@@ -1002,6 +1002,154 @@ struct vfio_device_feature {
  */
 #define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN	(0)
 
+/*
+ * Indicates the device can support the migration API through
+ * VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. If this GET succeeds, the RUNNING and
+ * ERROR states are always supported. Support for additional states is
+ * indicated via the flags field; at least VFIO_MIGRATION_STOP_COPY must be
+ * set.
+ *
+ * VFIO_MIGRATION_STOP_COPY means that STOP, STOP_COPY and
+ * RESUMING are supported.
+ */
+struct vfio_device_feature_migration {
+	__aligned_u64 flags;
+#define VFIO_MIGRATION_STOP_COPY	(1 << 0)
+};
+#define VFIO_DEVICE_FEATURE_MIGRATION 1
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET, execute a migration state change on the VFIO
+ * device. The new state is supplied in device_state, see enum
+ * vfio_device_mig_state for details
+ *
+ * The kernel migration driver must fully transition the device to the new state
+ * value before the operation returns to the user.
+ *
+ * The kernel migration driver must not generate asynchronous device state
+ * transitions outside of manipulation by the user or the VFIO_DEVICE_RESET
+ * ioctl as described above.
+ *
+ * If this function fails then current device_state may be the original
+ * operating state or some other state along the combination transition path.
+ * The user can then decide if it should execute a VFIO_DEVICE_RESET, attempt
+ * to return to the original state, or attempt to return to some other state
+ * such as RUNNING or STOP.
+ *
+ * If the new_state starts a new data transfer session then the FD associated
+ * with that session is returned in data_fd. The user is responsible to close
+ * this FD when it is finished. The user must consider the migration data stream
+ * carried over the FD to be opaque and must preserve the byte order of the
+ * stream. The user is not required to preserve buffer segmentation when writing
+ * the data stream during the RESUMING operation.
+ *
+ * Upon VFIO_DEVICE_FEATURE_GET, get the current migration state of the VFIO
+ * device, data_fd will be -1.
+ */
+struct vfio_device_feature_mig_state {
+	__u32 device_state; /* From enum vfio_device_mig_state */
+	__s32 data_fd;
+};
+#define VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE 2
+
+/*
+ * The device migration Finite State Machine is described by the enum
+ * vfio_device_mig_state. Some of the FSM arcs will create a migration data
+ * transfer session by returning a FD, in this case the migration data will
+ * flow over the FD using read() and write() as discussed below.
+ *
+ * There are 5 states to support VFIO_MIGRATION_STOP_COPY:
+ *  RUNNING - The device is running normally
+ *  STOP - The device does not change the internal or external state
+ *  STOP_COPY - The device internal state can be read out
+ *  RESUMING - The device is stopped and is loading a new internal state
+ *  ERROR - The device has failed and must be reset
+ *
+ * The FSM takes actions on the arcs between FSM states. The driver implements
+ * the following behavior for the FSM arcs:
+ *
+ * RUNNING -> STOP
+ * STOP_COPY -> STOP
+ *   While in STOP the device must stop the operation of the device. The device
+ *   must not generate interrupts, DMA, or any other change to external state.
+ *   It must not change its internal state. When stopped the device and kernel
+ *   migration driver must accept and respond to interaction to support external
+ *   subsystems in the STOP state, for example PCI MSI-X and PCI config space.
+ *   Failure by the user to restrict device access while in STOP must not result
+ *   in error conditions outside the user context (ex. host system faults).
+ *
+ *   The STOP_COPY arc will terminate a data transfer session.
+ *
+ * RESUMING -> STOP
+ *   Leaving RESUMING terminates a data transfer session and indicates the
+ *   device should complete processing of the data delivered by write(). The
+ *   kernel migration driver should complete the incorporation of data written
+ *   to the data transfer FD into the device internal state and perform
+ *   final validity and consistency checking of the new device state. If the
+ *   user provided data is found to be incomplete, inconsistent, or otherwise
+ *   invalid, the migration driver must fail the SET_STATE ioctl and
+ *   optionally go to the ERROR state as described below.
+ *
+ *   While in STOP the device has the same behavior as other STOP states
+ *   described above.
+ *
+ *   To abort a RESUMING session the device must be reset.
+ *
+ * STOP -> RUNNING
+ *   While in RUNNING the device is fully operational, the device may generate
+ *   interrupts, DMA, respond to MMIO, all vfio device regions are functional,
+ *   and the device may advance its internal state.
+ *
+ * STOP -> STOP_COPY
+ *   This arc begin the process of saving the device state and will return a
+ *   new data_fd.
+ *
+ *   While in the STOP_COPY state the device has the same behavior as STOP
+ *   with the addition that the data transfers session continues to stream the
+ *   migration state. End of stream on the FD indicates the entire device
+ *   state has been transferred.
+ *
+ *   The user should take steps to restrict access to vfio device regions while
+ *   the device is in STOP_COPY or risk corruption of the device migration data
+ *   stream.
+ *
+ * STOP -> RESUMING
+ *   Entering the RESUMING state starts a process of restoring the device state
+ *   and will return a new data_fd. The data stream fed into the data_fd should
+ *   be taken from the data transfer output of a single FD during saving from
+ *   a compatible device. The migration driver may alter/reset the internal
+ *   device state for this arc if required to prepare the device to receive the
+ *   migration data.
+ *
+ * any -> ERROR
+ *   ERROR cannot be specified as a device state, however any transition request
+ *   can be failed with an errno return and may then move the device_state into
+ *   ERROR. In this case the device was unable to execute the requested arc and
+ *   was also unable to restore the device to any valid device_state.
+ *   To recover from ERROR VFIO_DEVICE_RESET must be used to return the
+ *   device_state back to RUNNING.
+ *
+ * The remaining possible transitions are interpreted as combinations of the
+ * above FSM arcs. As there are multiple paths through the FSM arcs the path
+ * should be selected based on the following rules:
+ *   - Select the shortest path.
+ * Refer to vfio_mig_get_next_state() for the result of the algorithm.
+ *
+ * The automatic transit through the FSM arcs that make up the combination
+ * transition is invisible to the user. When working with combination arcs the
+ * user may see any step along the path in the device_state if SET_STATE
+ * fails. When handling these types of errors users should anticipate future
+ * revisions of this protocol using new states and those states becoming
+ * visible in this case.
+ */
+enum vfio_device_mig_state {
+	VFIO_DEVICE_STATE_ERROR = 0,
+	VFIO_DEVICE_STATE_STOP = 1,
+	VFIO_DEVICE_STATE_RUNNING = 2,
+	VFIO_DEVICE_STATE_STOP_COPY = 3,
+	VFIO_DEVICE_STATE_RESUMING = 4,
+};
+
 /* -------- API for Type1 VFIO IOMMU -------- */
 
 /**
-- 
cgit v1.2.3


From 8cb3d83b959be0631cd719b995c40c3cda21cd47 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Thu, 24 Feb 2022 16:20:19 +0200
Subject: vfio: Extend the device migration protocol with RUNNING_P2P

The RUNNING_P2P state is designed to support multiple devices in the same
VM that are doing P2P transactions between themselves. When in RUNNING_P2P
the device must be able to accept incoming P2P transactions but should not
generate outgoing P2P transactions.

As an optional extension to the mandatory states it is defined as
in between STOP and RUNNING:
   STOP -> RUNNING_P2P -> RUNNING -> RUNNING_P2P -> STOP

For drivers that are unable to support RUNNING_P2P the core code
silently merges RUNNING_P2P and RUNNING together. Unless driver support
is present, the new state cannot be used in SET_STATE.
Drivers that support this will be required to implement 4 FSM arcs
beyond the basic FSM. 2 of the basic FSM arcs become combination
transitions.

Compared to the v1 clarification, NDMA is redefined into FSM states and is
described in terms of the desired P2P quiescent behavior, noting that
halting all DMA is an acceptable implementation.

Link: https://lore.kernel.org/all/20220224142024.147653-11-yishaih@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/uapi/linux/vfio.h | 36 ++++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 22ed358c04c5..26a66f68371d 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1011,10 +1011,16 @@ struct vfio_device_feature {
  *
  * VFIO_MIGRATION_STOP_COPY means that STOP, STOP_COPY and
  * RESUMING are supported.
+ *
+ * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P
+ * is supported in addition to the STOP_COPY states.
+ *
+ * Other combinations of flags have behavior to be defined in the future.
  */
 struct vfio_device_feature_migration {
 	__aligned_u64 flags;
 #define VFIO_MIGRATION_STOP_COPY	(1 << 0)
+#define VFIO_MIGRATION_P2P		(1 << 1)
 };
 #define VFIO_DEVICE_FEATURE_MIGRATION 1
 
@@ -1065,10 +1071,13 @@ struct vfio_device_feature_mig_state {
  *  RESUMING - The device is stopped and is loading a new internal state
  *  ERROR - The device has failed and must be reset
  *
+ * And 1 optional state to support VFIO_MIGRATION_P2P:
+ *  RUNNING_P2P - RUNNING, except the device cannot do peer to peer DMA
+ *
  * The FSM takes actions on the arcs between FSM states. The driver implements
  * the following behavior for the FSM arcs:
  *
- * RUNNING -> STOP
+ * RUNNING_P2P -> STOP
  * STOP_COPY -> STOP
  *   While in STOP the device must stop the operation of the device. The device
  *   must not generate interrupts, DMA, or any other change to external state.
@@ -1095,11 +1104,16 @@ struct vfio_device_feature_mig_state {
  *
  *   To abort a RESUMING session the device must be reset.
  *
- * STOP -> RUNNING
+ * RUNNING_P2P -> RUNNING
  *   While in RUNNING the device is fully operational, the device may generate
  *   interrupts, DMA, respond to MMIO, all vfio device regions are functional,
  *   and the device may advance its internal state.
  *
+ * RUNNING -> RUNNING_P2P
+ * STOP -> RUNNING_P2P
+ *   While in RUNNING_P2P the device is partially running in the P2P quiescent
+ *   state defined below.
+ *
  * STOP -> STOP_COPY
  *   This arc begin the process of saving the device state and will return a
  *   new data_fd.
@@ -1129,6 +1143,18 @@ struct vfio_device_feature_mig_state {
  *   To recover from ERROR VFIO_DEVICE_RESET must be used to return the
  *   device_state back to RUNNING.
  *
+ * The optional peer to peer (P2P) quiescent state is intended to be a quiescent
+ * state for the device for the purposes of managing multiple devices within a
+ * user context where peer-to-peer DMA between devices may be active. The
+ * RUNNING_P2P states must prevent the device from initiating
+ * any new P2P DMA transactions. If the device can identify P2P transactions
+ * then it can stop only P2P DMA, otherwise it must stop all DMA. The migration
+ * driver must complete any such outstanding operations prior to completing the
+ * FSM arc into a P2P state. For the purpose of specification the states
+ * behave as though the device was fully running if not supported. Like while in
+ * STOP or STOP_COPY the user must not touch the device, otherwise the state
+ * can be exited.
+ *
  * The remaining possible transitions are interpreted as combinations of the
  * above FSM arcs. As there are multiple paths through the FSM arcs the path
  * should be selected based on the following rules:
@@ -1141,6 +1167,11 @@ struct vfio_device_feature_mig_state {
  * fails. When handling these types of errors users should anticipate future
  * revisions of this protocol using new states and those states becoming
  * visible in this case.
+ *
+ * The optional states cannot be used with SET_STATE if the device does not
+ * support them. The user can discover if these states are supported by using
+ * VFIO_DEVICE_FEATURE_MIGRATION. By using combination transitions the user can
+ * avoid knowing about these optional states if the kernel driver supports them.
  */
 enum vfio_device_mig_state {
 	VFIO_DEVICE_STATE_ERROR = 0,
@@ -1148,6 +1179,7 @@ enum vfio_device_mig_state {
 	VFIO_DEVICE_STATE_RUNNING = 2,
 	VFIO_DEVICE_STATE_STOP_COPY = 3,
 	VFIO_DEVICE_STATE_RESUMING = 4,
+	VFIO_DEVICE_STATE_RUNNING_P2P = 5,
 };
 
 /* -------- API for Type1 VFIO IOMMU -------- */
-- 
cgit v1.2.3


From 0f3f9cd7f752f6e685e378620babc5e34af6fb9f Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Thu, 24 Feb 2022 16:20:20 +0200
Subject: vfio: Remove migration protocol v1 documentation

v1 was never implemented and is replaced by v2.

The old uAPI documentation is removed from the header file.

The old uAPI definitions are still kept in the header file to ease
transition for userspace copying these headers. They will be fully
removed down the road.

Link: https://lore.kernel.org/all/20220224142024.147653-12-yishaih@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/uapi/linux/vfio.h | 200 +---------------------------------------------
 1 file changed, 2 insertions(+), 198 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 26a66f68371d..fea86061b44e 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -323,7 +323,7 @@ struct vfio_region_info_cap_type {
 #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
 #define VFIO_REGION_TYPE_GFX                    (1)
 #define VFIO_REGION_TYPE_CCW			(2)
-#define VFIO_REGION_TYPE_MIGRATION              (3)
+#define VFIO_REGION_TYPE_MIGRATION_DEPRECATED   (3)
 
 /* sub-types for VFIO_REGION_TYPE_PCI_* */
 
@@ -405,203 +405,7 @@ struct vfio_region_gfx_edid {
 #define VFIO_REGION_SUBTYPE_CCW_CRW		(3)
 
 /* sub-types for VFIO_REGION_TYPE_MIGRATION */
-#define VFIO_REGION_SUBTYPE_MIGRATION           (1)
-
-/*
- * The structure vfio_device_migration_info is placed at the 0th offset of
- * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related
- * migration information. Field accesses from this structure are only supported
- * at their native width and alignment. Otherwise, the result is undefined and
- * vendor drivers should return an error.
- *
- * device_state: (read/write)
- *      - The user application writes to this field to inform the vendor driver
- *        about the device state to be transitioned to.
- *      - The vendor driver should take the necessary actions to change the
- *        device state. After successful transition to a given state, the
- *        vendor driver should return success on write(device_state, state)
- *        system call. If the device state transition fails, the vendor driver
- *        should return an appropriate -errno for the fault condition.
- *      - On the user application side, if the device state transition fails,
- *	  that is, if write(device_state, state) returns an error, read
- *	  device_state again to determine the current state of the device from
- *	  the vendor driver.
- *      - The vendor driver should return previous state of the device unless
- *        the vendor driver has encountered an internal error, in which case
- *        the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR.
- *      - The user application must use the device reset ioctl to recover the
- *        device from VFIO_DEVICE_STATE_ERROR state. If the device is
- *        indicated to be in a valid device state by reading device_state, the
- *        user application may attempt to transition the device to any valid
- *        state reachable from the current state or terminate itself.
- *
- *      device_state consists of 3 bits:
- *      - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear,
- *        it indicates the _STOP state. When the device state is changed to
- *        _STOP, driver should stop the device before write() returns.
- *      - If bit 1 is set, it indicates the _SAVING state, which means that the
- *        driver should start gathering device state information that will be
- *        provided to the VFIO user application to save the device's state.
- *      - If bit 2 is set, it indicates the _RESUMING state, which means that
- *        the driver should prepare to resume the device. Data provided through
- *        the migration region should be used to resume the device.
- *      Bits 3 - 31 are reserved for future use. To preserve them, the user
- *      application should perform a read-modify-write operation on this
- *      field when modifying the specified bits.
- *
- *  +------- _RESUMING
- *  |+------ _SAVING
- *  ||+----- _RUNNING
- *  |||
- *  000b => Device Stopped, not saving or resuming
- *  001b => Device running, which is the default state
- *  010b => Stop the device & save the device state, stop-and-copy state
- *  011b => Device running and save the device state, pre-copy state
- *  100b => Device stopped and the device state is resuming
- *  101b => Invalid state
- *  110b => Error state
- *  111b => Invalid state
- *
- * State transitions:
- *
- *              _RESUMING  _RUNNING    Pre-copy    Stop-and-copy   _STOP
- *                (100b)     (001b)     (011b)        (010b)       (000b)
- * 0. Running or default state
- *                             |
- *
- * 1. Normal Shutdown (optional)
- *                             |------------------------------------->|
- *
- * 2. Save the state or suspend
- *                             |------------------------->|---------->|
- *
- * 3. Save the state during live migration
- *                             |----------->|------------>|---------->|
- *
- * 4. Resuming
- *                  |<---------|
- *
- * 5. Resumed
- *                  |--------->|
- *
- * 0. Default state of VFIO device is _RUNNING when the user application starts.
- * 1. During normal shutdown of the user application, the user application may
- *    optionally change the VFIO device state from _RUNNING to _STOP. This
- *    transition is optional. The vendor driver must support this transition but
- *    must not require it.
- * 2. When the user application saves state or suspends the application, the
- *    device state transitions from _RUNNING to stop-and-copy and then to _STOP.
- *    On state transition from _RUNNING to stop-and-copy, driver must stop the
- *    device, save the device state and send it to the application through the
- *    migration region. The sequence to be followed for such transition is given
- *    below.
- * 3. In live migration of user application, the state transitions from _RUNNING
- *    to pre-copy, to stop-and-copy, and to _STOP.
- *    On state transition from _RUNNING to pre-copy, the driver should start
- *    gathering the device state while the application is still running and send
- *    the device state data to application through the migration region.
- *    On state transition from pre-copy to stop-and-copy, the driver must stop
- *    the device, save the device state and send it to the user application
- *    through the migration region.
- *    Vendor drivers must support the pre-copy state even for implementations
- *    where no data is provided to the user before the stop-and-copy state. The
- *    user must not be required to consume all migration data before the device
- *    transitions to a new state, including the stop-and-copy state.
- *    The sequence to be followed for above two transitions is given below.
- * 4. To start the resuming phase, the device state should be transitioned from
- *    the _RUNNING to the _RESUMING state.
- *    In the _RESUMING state, the driver should use the device state data
- *    received through the migration region to resume the device.
- * 5. After providing saved device data to the driver, the application should
- *    change the state from _RESUMING to _RUNNING.
- *
- * reserved:
- *      Reads on this field return zero and writes are ignored.
- *
- * pending_bytes: (read only)
- *      The number of pending bytes still to be migrated from the vendor driver.
- *
- * data_offset: (read only)
- *      The user application should read data_offset field from the migration
- *      region. The user application should read the device data from this
- *      offset within the migration region during the _SAVING state or write
- *      the device data during the _RESUMING state. See below for details of
- *      sequence to be followed.
- *
- * data_size: (read/write)
- *      The user application should read data_size to get the size in bytes of
- *      the data copied in the migration region during the _SAVING state and
- *      write the size in bytes of the data copied in the migration region
- *      during the _RESUMING state.
- *
- * The format of the migration region is as follows:
- *  ------------------------------------------------------------------
- * |vfio_device_migration_info|    data section                      |
- * |                          |     ///////////////////////////////  |
- * ------------------------------------------------------------------
- *   ^                              ^
- *  offset 0-trapped part        data_offset
- *
- * The structure vfio_device_migration_info is always followed by the data
- * section in the region, so data_offset will always be nonzero. The offset
- * from where the data is copied is decided by the kernel driver. The data
- * section can be trapped, mmapped, or partitioned, depending on how the kernel
- * driver defines the data section. The data section partition can be defined
- * as mapped by the sparse mmap capability. If mmapped, data_offset must be
- * page aligned, whereas initial section which contains the
- * vfio_device_migration_info structure, might not end at the offset, which is
- * page aligned. The user is not required to access through mmap regardless
- * of the capabilities of the region mmap.
- * The vendor driver should determine whether and how to partition the data
- * section. The vendor driver should return data_offset accordingly.
- *
- * The sequence to be followed while in pre-copy state and stop-and-copy state
- * is as follows:
- * a. Read pending_bytes, indicating the start of a new iteration to get device
- *    data. Repeated read on pending_bytes at this stage should have no side
- *    effects.
- *    If pending_bytes == 0, the user application should not iterate to get data
- *    for that device.
- *    If pending_bytes > 0, perform the following steps.
- * b. Read data_offset, indicating that the vendor driver should make data
- *    available through the data section. The vendor driver should return this
- *    read operation only after data is available from (region + data_offset)
- *    to (region + data_offset + data_size).
- * c. Read data_size, which is the amount of data in bytes available through
- *    the migration region.
- *    Read on data_offset and data_size should return the offset and size of
- *    the current buffer if the user application reads data_offset and
- *    data_size more than once here.
- * d. Read data_size bytes of data from (region + data_offset) from the
- *    migration region.
- * e. Process the data.
- * f. Read pending_bytes, which indicates that the data from the previous
- *    iteration has been read. If pending_bytes > 0, go to step b.
- *
- * The user application can transition from the _SAVING|_RUNNING
- * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the
- * number of pending bytes. The user application should iterate in _SAVING
- * (stop-and-copy) until pending_bytes is 0.
- *
- * The sequence to be followed while _RESUMING device state is as follows:
- * While data for this device is available, repeat the following steps:
- * a. Read data_offset from where the user application should write data.
- * b. Write migration data starting at the migration region + data_offset for
- *    the length determined by data_size from the migration source.
- * c. Write data_size, which indicates to the vendor driver that data is
- *    written in the migration region. Vendor driver must return this write
- *    operations on consuming data. Vendor driver should apply the
- *    user-provided migration region data to the device resume state.
- *
- * If an error occurs during the above sequences, the vendor driver can return
- * an error code for next read() or write() operation, which will terminate the
- * loop. The user application should then take the next necessary action, for
- * example, failing migration or terminating the user application.
- *
- * For the user application, data is opaque. The user application should write
- * data in the same order as the data is received and the data should be of
- * same transaction size at the source.
- */
+#define VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED (1)
 
 struct vfio_device_migration_info {
 	__u32 device_state;         /* VFIO device state */
-- 
cgit v1.2.3


From 8d21ec0e46ed6e39994accff8eb4f2be3d2e76b5 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 2 Mar 2022 11:56:34 -0800
Subject: bpf: Add __sk_buff->delivery_time_type and
 bpf_skb_set_skb_delivery_time()

* __sk_buff->delivery_time_type:
This patch adds __sk_buff->delivery_time_type.  It tells if the
delivery_time is stored in __sk_buff->tstamp or not.

It will be most useful for ingress to tell if the __sk_buff->tstamp
has the (rcv) timestamp or delivery_time.  If delivery_time_type
is 0 (BPF_SKB_DELIVERY_TIME_NONE), it has the (rcv) timestamp.

Two non-zero types are defined for the delivery_time_type,
BPF_SKB_DELIVERY_TIME_MONO and BPF_SKB_DELIVERY_TIME_UNSPEC.  For UNSPEC,
it can only happen in egress because only mono delivery_time can be
forwarded to ingress now.  The clock of UNSPEC delivery_time
can be deduced from the skb->sk->sk_clockid which is how
the sch_etf doing it also.

* Provide forwarded delivery_time to tc-bpf@ingress:
With the help of the new delivery_time_type, the tc-bpf has a way
to tell if the __sk_buff->tstamp has the (rcv) timestamp or
the delivery_time.  During bpf load time, the verifier will learn if
the bpf prog has accessed the new __sk_buff->delivery_time_type.
If it does, it means the tc-bpf@ingress is expecting the
skb->tstamp could have the delivery_time.  The kernel will then
read the skb->tstamp as-is during bpf insn rewrite without
checking the skb->mono_delivery_time.  This is done by adding a
new prog->delivery_time_access bit.  The same goes for
writing skb->tstamp.

* bpf_skb_set_delivery_time():
The bpf_skb_set_delivery_time() helper is added to allow setting both
delivery_time and the delivery_time_type at the same time.  If the
tc-bpf does not need to change the delivery_time_type, it can directly
write to the __sk_buff->tstamp as the existing tc-bpf has already been
doing.  It will be most useful at ingress to change the
__sk_buff->tstamp from the (rcv) timestamp to
a mono delivery_time and then bpf_redirect_*().

bpf only has mono clock helper (bpf_ktime_get_ns), and
the current known use case is the mono EDT for fq, and
only mono delivery time can be kept during forward now,
so bpf_skb_set_delivery_time() only supports setting
BPF_SKB_DELIVERY_TIME_MONO.  It can be extended later when use cases
come up and the forwarding path also supports other clock bases.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 41 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index afe3d0d7f5f2..4eebea830613 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5086,6 +5086,37 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure. On error
  *		*dst* buffer is zeroed out.
+ *
+ * long bpf_skb_set_delivery_time(struct sk_buff *skb, u64 dtime, u32 dtime_type)
+ *	Description
+ *		Set a *dtime* (delivery time) to the __sk_buff->tstamp and also
+ *		change the __sk_buff->delivery_time_type to *dtime_type*.
+ *
+ *		When setting a delivery time (non zero *dtime*) to
+ *		__sk_buff->tstamp, only BPF_SKB_DELIVERY_TIME_MONO *dtime_type*
+ *		is supported.  It is the only delivery_time_type that will be
+ *		kept after bpf_redirect_*().
+ *
+ *		If there is no need to change the __sk_buff->delivery_time_type,
+ *		the delivery time can be directly written to __sk_buff->tstamp
+ *		instead.
+ *
+ *		*dtime* 0 and *dtime_type* BPF_SKB_DELIVERY_TIME_NONE
+ *		can be used to clear any delivery time stored in
+ *		__sk_buff->tstamp.
+ *
+ *		Only IPv4 and IPv6 skb->protocol are supported.
+ *
+ *		This function is most useful when it needs to set a
+ *		mono delivery time to __sk_buff->tstamp and then
+ *		bpf_redirect_*() to the egress of an iface.  For example,
+ *		changing the (rcv) timestamp in __sk_buff->tstamp at
+ *		ingress to a mono delivery time and then bpf_redirect_*()
+ *		to sch_fq@phy-dev.
+ *	Return
+ *		0 on success.
+ *		**-EINVAL** for invalid input
+ *		**-EOPNOTSUPP** for unsupported delivery_time_type and protocol
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5280,6 +5311,7 @@ union bpf_attr {
 	FN(xdp_load_bytes),		\
 	FN(xdp_store_bytes),		\
 	FN(copy_from_user_task),	\
+	FN(skb_set_delivery_time),      \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5469,6 +5501,12 @@ union {					\
 	__u64 :64;			\
 } __attribute__((aligned(8)))
 
+enum {
+	BPF_SKB_DELIVERY_TIME_NONE,
+	BPF_SKB_DELIVERY_TIME_UNSPEC,
+	BPF_SKB_DELIVERY_TIME_MONO,
+};
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
@@ -5509,7 +5547,8 @@ struct __sk_buff {
 	__u32 gso_segs;
 	__bpf_md_ptr(struct bpf_sock *, sk);
 	__u32 gso_size;
-	__u32 :32;		/* Padding, future use. */
+	__u8  delivery_time_type;
+	__u32 :24;		/* Padding, future use. */
 	__u64 hwtstamp;
 };
 
-- 
cgit v1.2.3


From 51ef2be546e2e480e56fdb59fdeb9a4406e8d52e Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Thu, 17 Feb 2022 16:44:07 +0100
Subject: media: i2c: isl7998x: Add driver for Intersil ISL7998x

Add driver for the Intersil ISL7998x Analog to MIPI CSI-2/BT656 decoder.
This chip supports 1/2/4 analog video inputs and converts them into
1/2/4 VCs in MIPI CSI2 stream.

This driver currently supports ISL79987 and both 720x480 and 720x576
resolutions, however as per specification, all inputs must use the
same resolution and standard. The only supported pixel format is now
YUYV/YUV422. The chip should support RGB565 on the CSI2 as well, but
this is currently unsupported.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Cc: Rob Herring <robh+dt@kernel.org>
To: linux-media@vger.kernel.org
Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
[Sakari Ailus: Always call pm_runtime_get_and_resume in pre_streamon]
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
---
 include/uapi/linux/v4l2-controls.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index c8e0f84d204d..92576ed03fc4 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -219,6 +219,12 @@ enum v4l2_colorfx {
  */
 #define V4L2_CID_USER_ALLEGRO_BASE		(V4L2_CID_USER_BASE + 0x1170)
 
+/*
+ * The base for the isl7998x driver controls.
+ * We reserve 16 controls for this driver.
+ */
+#define V4L2_CID_USER_ISL7998X_BASE		(V4L2_CID_USER_BASE + 0x1180)
+
 /* MPEG-class control IDs */
 /* The MPEG controls are applicable to all codec controls
  * and the 'MPEG' part of the define is historical */
-- 
cgit v1.2.3


From 96ba61ee5331eb6e2f4c2baeb994b9ceb01d8266 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Sun, 20 Feb 2022 21:46:16 +0100
Subject: media: v4l2-ctrls: Add new V4L2_H264_DECODE_PARAM_FLAG_P/BFRAME flags

Add new V4L2_H264_DECODE_PARAM_FLAG_P/BFRAME flags that are needed by
NVIDIA Tegra video decoder. Userspace will have to set these flags in
accordance to the type of a decoded frame.

Reviewed-by: Nicolas Dufresne <nicolas@collabora.com>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
 include/uapi/linux/v4l2-controls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index c8e0f84d204d..e3d48d571062 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -1563,6 +1563,8 @@ struct v4l2_h264_dpb_entry {
 #define V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC		0x01
 #define V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC		0x02
 #define V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD	0x04
+#define V4L2_H264_DECODE_PARAM_FLAG_PFRAME		0x08
+#define V4L2_H264_DECODE_PARAM_FLAG_BFRAME		0x10
 
 #define V4L2_CID_STATELESS_H264_DECODE_PARAMS	(V4L2_CID_CODEC_STATELESS_BASE + 7)
 /**
-- 
cgit v1.2.3


From 1092347165cf5ed1453c1f211641a859818a2828 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Wed, 9 Feb 2022 17:03:12 +0100
Subject: media: lirc: remove unused feature LIRC_CAN_SET_REC_DUTY_CYCLE

There is no hardware which can filter input on the duty cycle, so no
driver implements this. On top of that, LIRC_CAN_SET_REC_DUTY_CYCLE
has the same value as LIRC_CAN_MEASURE_CARRIER (0x02000000).

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
 include/uapi/linux/lirc.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h
index 21c69a6a100d..23b0f2c8ba81 100644
--- a/include/uapi/linux/lirc.h
+++ b/include/uapi/linux/lirc.h
@@ -73,7 +73,6 @@
 #define LIRC_CAN_REC_MASK              LIRC_MODE2REC(LIRC_CAN_SEND_MASK)
 
 #define LIRC_CAN_SET_REC_CARRIER       (LIRC_CAN_SET_SEND_CARRIER << 16)
-#define LIRC_CAN_SET_REC_DUTY_CYCLE    (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16)
 
 #define LIRC_CAN_SET_REC_CARRIER_RANGE    0x80000000
 #define LIRC_CAN_GET_REC_RESOLUTION       0x20000000
-- 
cgit v1.2.3


From 72a74c8f0a0df12c7d7ea012aa70d95152858dea Mon Sep 17 00:00:00 2001
From: Ming Qian <ming.qian@nxp.com>
Date: Thu, 24 Feb 2022 11:10:00 +0800
Subject: media: add nv12m_8l128 and nv12m_10be_8l128 video format.

nv12m_8l128 is 8-bit tiled nv12 format used by amphion decoder.
nv12m_10be_8l128 is 10-bit tiled format used by amphion decoder.
The tile size is 8x128

Signed-off-by: Ming Qian <ming.qian@nxp.com>
Signed-off-by: Shijie Qin <shijie.qin@nxp.com>
Signed-off-by: Zhou Peng <eagle.zhou@nxp.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/uapi/linux/videodev2.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index df8b9c486ba1..3768a0a80830 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -632,6 +632,8 @@ struct v4l2_pix_format {
 /* Tiled YUV formats, non contiguous planes */
 #define V4L2_PIX_FMT_NV12MT  v4l2_fourcc('T', 'M', '1', '2') /* 12  Y/CbCr 4:2:0 64x32 tiles */
 #define V4L2_PIX_FMT_NV12MT_16X16 v4l2_fourcc('V', 'M', '1', '2') /* 12  Y/CbCr 4:2:0 16x16 tiles */
+#define V4L2_PIX_FMT_NV12M_8L128      v4l2_fourcc('N', 'A', '1', '2') /* Y/CbCr 4:2:0 8x128 tiles */
+#define V4L2_PIX_FMT_NV12M_10BE_8L128 v4l2_fourcc_be('N', 'T', '1', '2') /* Y/CbCr 4:2:0 10-bit 8x128 tiles */
 
 /* Bayer formats - see http://www.siliconimaging.com/RGB%20Bayer.htm */
 #define V4L2_PIX_FMT_SBGGR8  v4l2_fourcc('B', 'A', '8', '1') /*  8  BGBG.. GRGR.. */
-- 
cgit v1.2.3


From d045b9eb95a9b611c483897a69e7285aefdc66d7 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 7 Mar 2022 12:44:36 -0800
Subject: mptcp: introduce implicit endpoints

In some edge scenarios, an MPTCP subflows can use a local address
mapped by a "implicit" endpoint created by the in-kernel path manager.

Such endpoints presence can be confusing, as it's creation is hard
to track and will prevent the later endpoint creation from the user-space
using the same address.

Define a new endpoint flag to mark implicit endpoints and allow the
user-space to replace implicit them with user-provided data at endpoint
creation time.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/mptcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index f106a3941cdf..9690efedb5fa 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -81,6 +81,7 @@ enum {
 #define MPTCP_PM_ADDR_FLAG_SUBFLOW			(1 << 1)
 #define MPTCP_PM_ADDR_FLAG_BACKUP			(1 << 2)
 #define MPTCP_PM_ADDR_FLAG_FULLMESH			(1 << 3)
+#define MPTCP_PM_ADDR_FLAG_IMPLICIT			(1 << 4)
 
 enum {
 	MPTCP_PM_CMD_UNSPEC,
-- 
cgit v1.2.3


From b530e9e1063ed2b817eae7eec6ed2daa8be11608 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 9 Mar 2022 11:53:42 +0100
Subject: bpf: Add "live packet" mode for XDP in BPF_PROG_RUN
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds support for running XDP programs through BPF_PROG_RUN in a mode
that enables live packet processing of the resulting frames. Previous uses
of BPF_PROG_RUN for XDP returned the XDP program return code and the
modified packet data to userspace, which is useful for unit testing of XDP
programs.

The existing BPF_PROG_RUN for XDP allows userspace to set the ingress
ifindex and RXQ number as part of the context object being passed to the
kernel. This patch reuses that code, but adds a new mode with different
semantics, which can be selected with the new BPF_F_TEST_XDP_LIVE_FRAMES
flag.

When running BPF_PROG_RUN in this mode, the XDP program return codes will
be honoured: returning XDP_PASS will result in the frame being injected
into the networking stack as if it came from the selected networking
interface, while returning XDP_TX and XDP_REDIRECT will result in the frame
being transmitted out that interface. XDP_TX is translated into an
XDP_REDIRECT operation to the same interface, since the real XDP_TX action
is only possible from within the network drivers themselves, not from the
process context where BPF_PROG_RUN is executed.

Internally, this new mode of operation creates a page pool instance while
setting up the test run, and feeds pages from that into the XDP program.
The setup cost of this is amortised over the number of repetitions
specified by userspace.

To support the performance testing use case, we further optimise the setup
step so that all pages in the pool are pre-initialised with the packet
data, and pre-computed context and xdp_frame objects stored at the start of
each page. This makes it possible to entirely avoid touching the page
content on each XDP program invocation, and enables sending up to 9
Mpps/core on my test box.

Because the data pages are recycled by the page pool, and the test runner
doesn't re-initialise them for each run, subsequent invocations of the XDP
program will see the packet data in the state it was after the last time it
ran on that particular page. This means that an XDP program that modifies
the packet before redirecting it has to be careful about which assumptions
it makes about the packet content, but that is only an issue for the most
naively written programs.

Enabling the new flag is only allowed when not setting ctx_out and data_out
in the test specification, since using it means frames will be redirected
somewhere else, so they can't be returned.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220309105346.100053-2-toke@redhat.com
---
 include/uapi/linux/bpf.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4eebea830613..bc23020b638d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1232,6 +1232,8 @@ enum {
 
 /* If set, run the test on the cpu specified by bpf_attr.test.cpu */
 #define BPF_F_TEST_RUN_ON_CPU	(1U << 0)
+/* If set, XDP frames will be transmitted after processing */
+#define BPF_F_TEST_XDP_LIVE_FRAMES	(1U << 1)
 
 /* type for BPF_ENABLE_STATS */
 enum bpf_stats_type {
@@ -1393,6 +1395,7 @@ union bpf_attr {
 		__aligned_u64	ctx_out;
 		__u32		flags;
 		__u32		cpu;
+		__u32		batch_size;
 	} test;
 
 	struct { /* anonymous struct used by BPF_*_GET_*_ID */
-- 
cgit v1.2.3


From 530e0d46c61314c59ecfdb8d3bcb87edbc0f85d3 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Wed, 9 Mar 2022 13:04:13 +0100
Subject: can: isotp: set default value for N_As to 50 micro seconds

The N_As value describes the time a CAN frame needs on the wire when
transmitted by the CAN controller. Even very short CAN FD frames need
arround 100 usecs (bitrate 1Mbit/s, data bitrate 8Mbit/s).

Having N_As to be zero (the former default) leads to 'no CAN frame
separation' when STmin is set to zero by the receiving node. This 'burst
mode' should not be enabled by default as it could potentially dump a high
number of CAN frames into the netdev queue from the soft hrtimer context.
This does not affect the system stability but is just not nice and
cooperative.

With this N_As/frame_txtime value the 'burst mode' is disabled by default.

As user space applications usually do not set the frame_txtime element
of struct can_isotp_options the new in-kernel default is very likely
overwritten with zero when the sockopt() CAN_ISOTP_OPTS is invoked.
To make sure that a N_As value of zero is only set intentional the
value '0' is now interpreted as 'do not change the current value'.
When a frame_txtime of zero is required for testing purposes this
CAN_ISOTP_FRAME_TXTIME_ZERO u32 value has to be set in frame_txtime.

Link: https://lore.kernel.org/all/20220309120416.83514-2-socketcan@hartkopp.net
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/isotp.h | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h
index c55935b64ccc..590f8aea2b6d 100644
--- a/include/uapi/linux/can/isotp.h
+++ b/include/uapi/linux/can/isotp.h
@@ -137,20 +137,16 @@ struct can_isotp_ll_options {
 #define CAN_ISOTP_WAIT_TX_DONE	0x400	/* wait for tx completion */
 #define CAN_ISOTP_SF_BROADCAST	0x800	/* 1-to-N functional addressing */
 
-/* default values */
+/* protocol machine default values */
 
 #define CAN_ISOTP_DEFAULT_FLAGS		0
 #define CAN_ISOTP_DEFAULT_EXT_ADDRESS	0x00
 #define CAN_ISOTP_DEFAULT_PAD_CONTENT	0xCC /* prevent bit-stuffing */
-#define CAN_ISOTP_DEFAULT_FRAME_TXTIME	0
+#define CAN_ISOTP_DEFAULT_FRAME_TXTIME	50000 /* 50 micro seconds */
 #define CAN_ISOTP_DEFAULT_RECV_BS	0
 #define CAN_ISOTP_DEFAULT_RECV_STMIN	0x00
 #define CAN_ISOTP_DEFAULT_RECV_WFTMAX	0
 
-#define CAN_ISOTP_DEFAULT_LL_MTU	CAN_MTU
-#define CAN_ISOTP_DEFAULT_LL_TX_DL	CAN_MAX_DLEN
-#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS	0
-
 /*
  * Remark on CAN_ISOTP_DEFAULT_RECV_* values:
  *
@@ -162,4 +158,24 @@ struct can_isotp_ll_options {
  * consistency and copied directly into the flow control (FC) frame.
  */
 
+/* link layer default values => make use of Classical CAN frames */
+
+#define CAN_ISOTP_DEFAULT_LL_MTU	CAN_MTU
+#define CAN_ISOTP_DEFAULT_LL_TX_DL	CAN_MAX_DLEN
+#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS	0
+
+/*
+ * The CAN_ISOTP_DEFAULT_FRAME_TXTIME has become a non-zero value as
+ * it only makes sense for isotp implementation tests to run without
+ * a N_As value. As user space applications usually do not set the
+ * frame_txtime element of struct can_isotp_options the new in-kernel
+ * default is very likely overwritten with zero when the sockopt()
+ * CAN_ISOTP_OPTS is invoked.
+ * To make sure that a N_As value of zero is only set intentional the
+ * value '0' is now interpreted as 'do not change the current value'.
+ * When a frame_txtime of zero is required for testing purposes this
+ * CAN_ISOTP_FRAME_TXTIME_ZERO u32 value has to be set in frame_txtime.
+ */
+#define CAN_ISOTP_FRAME_TXTIME_ZERO	0xFFFFFFFF
+
 #endif /* !_UAPI_CAN_ISOTP_H */
-- 
cgit v1.2.3


From e7a6c00dc77aedf27a601738ea509f1caea6d673 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 4 Mar 2022 08:22:22 -0700
Subject: io_uring: add support for registering ring file descriptors

Lots of workloads use multiple threads, in which case the file table is
shared between them. This makes getting and putting the ring file
descriptor for each io_uring_enter(2) system call more expensive, as it
involves an atomic get and put for each call.

Similarly to how we allow registering normal file descriptors to avoid
this overhead, add support for an io_uring_register(2) API that allows
to register the ring fds themselves:

1) IORING_REGISTER_RING_FDS - takes an array of io_uring_rsrc_update
   structs, and registers them with the task.
2) IORING_UNREGISTER_RING_FDS - takes an array of io_uring_src_update
   structs, and unregisters them.

When a ring fd is registered, it is internally represented by an offset.
This offset is returned to the application, and the application then
uses this offset and sets IORING_ENTER_REGISTERED_RING for the
io_uring_enter(2) system call. This works just like using a registered
file descriptor, rather than a real one, in an SQE, where
IOSQE_FIXED_FILE gets set to tell io_uring that we're using an internal
offset/descriptor rather than a real file descriptor.

In initial testing, this provides a nice bump in performance for
threaded applications in real world cases where the batch count (eg
number of requests submitted per io_uring_enter(2) invocation) is low.
In a microbenchmark, submitting NOP requests, we see the following
increases in performance:

Requests per syscall	Baseline	Registered	Increase
----------------------------------------------------------------
1			 ~7030K		 ~8080K		+15%
2			~13120K		~14800K		+13%
4			~22740K		~25300K		+11%

Co-developed-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 787f491f0d2a..42b2fe84dbcd 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -257,10 +257,11 @@ struct io_cqring_offsets {
 /*
  * io_uring_enter(2) flags
  */
-#define IORING_ENTER_GETEVENTS	(1U << 0)
-#define IORING_ENTER_SQ_WAKEUP	(1U << 1)
-#define IORING_ENTER_SQ_WAIT	(1U << 2)
-#define IORING_ENTER_EXT_ARG	(1U << 3)
+#define IORING_ENTER_GETEVENTS		(1U << 0)
+#define IORING_ENTER_SQ_WAKEUP		(1U << 1)
+#define IORING_ENTER_SQ_WAIT		(1U << 2)
+#define IORING_ENTER_EXT_ARG		(1U << 3)
+#define IORING_ENTER_REGISTERED_RING	(1U << 4)
 
 /*
  * Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -325,6 +326,10 @@ enum {
 	/* set/get max number of io-wq workers */
 	IORING_REGISTER_IOWQ_MAX_WORKERS	= 19,
 
+	/* register/unregister io_uring fd with the ring */
+	IORING_REGISTER_RING_FDS		= 20,
+	IORING_UNREGISTER_RING_FDS		= 21,
+
 	/* this goes last */
 	IORING_REGISTER_LAST
 };
-- 
cgit v1.2.3


From 4f57f06ce2186c31c3da52386125dc57b1cd6f96 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 10 Mar 2022 06:27:26 -0700
Subject: io_uring: add support for IORING_OP_MSG_RING command

This adds support for IORING_OP_MSG_RING, which allows an SQE to signal
another ring. That allows either waking up someone waiting on the ring,
or even passing a 64-bit value via the user_data field in the CQE.

sqe->fd must contain the fd of a ring that should receive the CQE.
sqe->off will be propagated to the cqe->user_data on the target ring,
and sqe->len will be propagated to cqe->res. The results CQE will have
IORING_CQE_F_MSG set in its flags, to indicate that this CQE was generated
from a messaging request rather than a SQE issued locally on that ring.
This effectively allows passing a 64-bit and a 32-bit quantify between
the two rings.

This request type has the following request specific error cases:

- -EBADFD. Set if the sqe->fd doesn't point to a file descriptor that is
  of the io_uring type.
- -EOVERFLOW. Set if we were not able to deliver a request to the target
  ring.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 42b2fe84dbcd..8bd4bfdd9a89 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -143,6 +143,7 @@ enum {
 	IORING_OP_MKDIRAT,
 	IORING_OP_SYMLINKAT,
 	IORING_OP_LINKAT,
+	IORING_OP_MSG_RING,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
@@ -199,9 +200,11 @@ struct io_uring_cqe {
  *
  * IORING_CQE_F_BUFFER	If set, the upper 16 bits are the buffer ID
  * IORING_CQE_F_MORE	If set, parent SQE will generate more CQE entries
+ * IORING_CQE_F_MSG	If set, CQE was generated with IORING_OP_MSG_RING
  */
 #define IORING_CQE_F_BUFFER		(1U << 0)
 #define IORING_CQE_F_MORE		(1U << 1)
+#define IORING_CQE_F_MSG		(1U << 2)
 
 enum {
 	IORING_CQE_BUFFER_SHIFT		= 16,
-- 
cgit v1.2.3


From 153474ba1a4aed0a7b797b4c2be8c35c7a4e57bd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 27 Jan 2022 11:46:37 -0600
Subject: ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h

Rename tracehook_report_syscall_{entry,exit} to
ptrace_report_syscall_{entry,exit} and place them in ptrace.h

There is no longer any generic tracehook infractructure so make
these ptrace specific functions ptrace specific.

Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lkml.kernel.org/r/20220309162454.123006-3-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/uapi/linux/ptrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 3747bf816f9a..b7af92e07d1f 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -114,7 +114,7 @@ struct ptrace_rseq_configuration {
 
 /*
  * These values are stored in task->ptrace_message
- * by tracehook_report_syscall_* to describe the current syscall-stop.
+ * by ptrace_report_syscall_* to describe the current syscall-stop.
  */
 #define PTRACE_EVENTMSG_SYSCALL_ENTRY	1
 #define PTRACE_EVENTMSG_SYSCALL_EXIT	2
-- 
cgit v1.2.3


From bcbb7bf6ccde7cb969a5642879832bc84ebf06a3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 10 Mar 2022 12:59:35 -0700
Subject: io_uring: allow submissions to continue on error

By default, io_uring will stop submitting a batch of requests if we run
into an error submitting a request. This isn't strictly necessary, as
the error result is passed out-of-band via a CQE anyway. And it can be
a bit confusing for some applications.

Provide a way to setup a ring that will continue submitting on error,
when the error CQE has been posted.

There's still one case that will break out of submission. If we fail
allocating a request, then we'll still return -ENOMEM. We could in theory
post a CQE for that condition too even if we never got a request. Leave
that for a potential followup.

Reported-by: Dylan Yudaken <dylany@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 8bd4bfdd9a89..d2be4eb22008 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -101,6 +101,7 @@ enum {
 #define IORING_SETUP_CLAMP	(1U << 4)	/* clamp SQ/CQ ring sizes */
 #define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
 #define IORING_SETUP_R_DISABLED	(1U << 6)	/* start with ring disabled */
+#define IORING_SETUP_SUBMIT_ALL	(1U << 7)	/* continue submit on error */
 
 enum {
 	IORING_OP_NOP,
-- 
cgit v1.2.3


From 9bb984f28d5bcb917d35d930fcfb89f90f9449fd Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 9 Mar 2022 01:05:09 -0800
Subject: bpf: Remove BPF_SKB_DELIVERY_TIME_NONE and rename
 s/delivery_time_/tstamp_/

This patch is to simplify the uapi bpf.h regarding to the tstamp type
and use a similar way as the kernel to describe the value stored
in __sk_buff->tstamp.

My earlier thought was to avoid describing the semantic and
clock base for the rcv timestamp until there is more clarity
on the use case, so the __sk_buff->delivery_time_type naming instead
of __sk_buff->tstamp_type.

With some thoughts, it can reuse the UNSPEC naming.  This patch first
removes BPF_SKB_DELIVERY_TIME_NONE and also

rename BPF_SKB_DELIVERY_TIME_UNSPEC to BPF_SKB_TSTAMP_UNSPEC
and    BPF_SKB_DELIVERY_TIME_MONO   to BPF_SKB_TSTAMP_DELIVERY_MONO.

The semantic of BPF_SKB_TSTAMP_DELIVERY_MONO is the same:
__sk_buff->tstamp has delivery time in mono clock base.

BPF_SKB_TSTAMP_UNSPEC means __sk_buff->tstamp has the (rcv)
tstamp at ingress and the delivery time at egress.  At egress,
the clock base could be found from skb->sk->sk_clockid.
__sk_buff->tstamp == 0 naturally means NONE, so NONE is not needed.

With BPF_SKB_TSTAMP_UNSPEC for the rcv tstamp at ingress,
the __sk_buff->delivery_time_type is also renamed to __sk_buff->tstamp_type
which was also suggested in the earlier discussion:
https://lore.kernel.org/bpf/b181acbe-caf8-502d-4b7b-7d96b9fc5d55@iogearbox.net/

The above will then make __sk_buff->tstamp and __sk_buff->tstamp_type
the same as its kernel skb->tstamp and skb->mono_delivery_time
counter part.

The internal kernel function bpf_skb_convert_dtime_type_read() is then
renamed to bpf_skb_convert_tstamp_type_read() and it can be simplified
with the BPF_SKB_DELIVERY_TIME_NONE gone.  A BPF_ALU32_IMM(BPF_AND)
insn is also saved by using BPF_JMP32_IMM(BPF_JSET).

The bpf helper bpf_skb_set_delivery_time() is also renamed to
bpf_skb_set_tstamp().  The arg name is changed from dtime
to tstamp also.  It only allows setting tstamp 0 for
BPF_SKB_TSTAMP_UNSPEC and it could be relaxed later
if there is use case to change mono delivery time to
non mono.

prog->delivery_time_access is also renamed to prog->tstamp_type_access.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220309090509.3712315-1-kafai@fb.com
---
 include/uapi/linux/bpf.h | 40 +++++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 19 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bc23020b638d..d288a0a9f797 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5090,23 +5090,22 @@ union bpf_attr {
  *		0 on success, or a negative error in case of failure. On error
  *		*dst* buffer is zeroed out.
  *
- * long bpf_skb_set_delivery_time(struct sk_buff *skb, u64 dtime, u32 dtime_type)
+ * long bpf_skb_set_tstamp(struct sk_buff *skb, u64 tstamp, u32 tstamp_type)
  *	Description
- *		Set a *dtime* (delivery time) to the __sk_buff->tstamp and also
- *		change the __sk_buff->delivery_time_type to *dtime_type*.
+ *		Change the __sk_buff->tstamp_type to *tstamp_type*
+ *		and set *tstamp* to the __sk_buff->tstamp together.
  *
- *		When setting a delivery time (non zero *dtime*) to
- *		__sk_buff->tstamp, only BPF_SKB_DELIVERY_TIME_MONO *dtime_type*
- *		is supported.  It is the only delivery_time_type that will be
- *		kept after bpf_redirect_*().
- *
- *		If there is no need to change the __sk_buff->delivery_time_type,
- *		the delivery time can be directly written to __sk_buff->tstamp
+ *		If there is no need to change the __sk_buff->tstamp_type,
+ *		the tstamp value can be directly written to __sk_buff->tstamp
  *		instead.
  *
- *		*dtime* 0 and *dtime_type* BPF_SKB_DELIVERY_TIME_NONE
- *		can be used to clear any delivery time stored in
- *		__sk_buff->tstamp.
+ *		BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that
+ *		will be kept during bpf_redirect_*().  A non zero
+ *		*tstamp* must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO
+ *		*tstamp_type*.
+ *
+ *		A BPF_SKB_TSTAMP_UNSPEC *tstamp_type* can only be used
+ *		with a zero *tstamp*.
  *
  *		Only IPv4 and IPv6 skb->protocol are supported.
  *
@@ -5119,7 +5118,7 @@ union bpf_attr {
  *	Return
  *		0 on success.
  *		**-EINVAL** for invalid input
- *		**-EOPNOTSUPP** for unsupported delivery_time_type and protocol
+ *		**-EOPNOTSUPP** for unsupported protocol
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5314,7 +5313,7 @@ union bpf_attr {
 	FN(xdp_load_bytes),		\
 	FN(xdp_store_bytes),		\
 	FN(copy_from_user_task),	\
-	FN(skb_set_delivery_time),      \
+	FN(skb_set_tstamp),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5505,9 +5504,12 @@ union {					\
 } __attribute__((aligned(8)))
 
 enum {
-	BPF_SKB_DELIVERY_TIME_NONE,
-	BPF_SKB_DELIVERY_TIME_UNSPEC,
-	BPF_SKB_DELIVERY_TIME_MONO,
+	BPF_SKB_TSTAMP_UNSPEC,
+	BPF_SKB_TSTAMP_DELIVERY_MONO,	/* tstamp has mono delivery time */
+	/* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
+	 * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
+	 * and try to deduce it by ingress, egress or skb->sk->sk_clockid.
+	 */
 };
 
 /* user accessible mirror of in-kernel sk_buff.
@@ -5550,7 +5552,7 @@ struct __sk_buff {
 	__u32 gso_segs;
 	__bpf_md_ptr(struct bpf_sock *, sk);
 	__u32 gso_size;
-	__u8  delivery_time_type;
+	__u8  tstamp_type;
 	__u32 :24;		/* Padding, future use. */
 	__u64 hwtstamp;
 };
-- 
cgit v1.2.3


From 58617014405ad5c9f94f464444f4972dabb71ca7 Mon Sep 17 00:00:00 2001
From: Hengqi Chen <hengqi.chen@gmail.com>
Date: Thu, 10 Mar 2022 23:53:35 +0800
Subject: bpf: Fix comment for helper bpf_current_task_under_cgroup()

Fix the descriptions of the return values of helper bpf_current_task_under_cgroup().

Fixes: c6b5fb8690fa ("bpf: add documentation for eBPF helpers (42-50)")
Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220310155335.1278783-1-hengqi.chen@gmail.com
---
 include/uapi/linux/bpf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d288a0a9f797..e9978a916c3e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2302,8 +2302,8 @@ union bpf_attr {
  * 	Return
  * 		The return value depends on the result of the test, and can be:
  *
- *		* 0, if current task belongs to the cgroup2.
- *		* 1, if current task does not belong to the cgroup2.
+ *		* 1, if current task belongs to the cgroup2.
+ *		* 0, if current task does not belong to the cgroup2.
  * 		* A negative error code, if an error occurred.
  *
  * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
-- 
cgit v1.2.3


From 174b16946e39ebd369097e0f773536c91a8c1a4c Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Wed, 2 Mar 2022 12:13:58 +0100
Subject: bpf-lsm: Introduce new helper bpf_ima_file_hash()

ima_file_hash() has been modified to calculate the measurement of a file on
demand, if it has not been already performed by IMA or the measurement is
not fresh. For compatibility reasons, ima_inode_hash() remains unchanged.

Keep the same approach in eBPF and introduce the new helper
bpf_ima_file_hash() to take advantage of the modified behavior of
ima_file_hash().

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220302111404.193900-4-roberto.sassu@huawei.com
---
 include/uapi/linux/bpf.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e9978a916c3e..99fab54ae9c0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5119,6 +5119,16 @@ union bpf_attr {
  *		0 on success.
  *		**-EINVAL** for invalid input
  *		**-EOPNOTSUPP** for unsupported protocol
+ *
+ * long bpf_ima_file_hash(struct file *file, void *dst, u32 size)
+ *	Description
+ *		Returns a calculated IMA hash of the *file*.
+ *		If the hash is larger than *size*, then only *size*
+ *		bytes will be copied to *dst*
+ *	Return
+ *		The **hash_algo** is returned on success,
+ *		**-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
+ *		invalid arguments are passed.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5314,6 +5324,7 @@ union bpf_attr {
 	FN(xdp_store_bytes),		\
 	FN(copy_from_user_task),	\
 	FN(skb_set_tstamp),		\
+	FN(ima_file_hash),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 1926407a4ab0e59d5a27bed7b82029b356d80fa0 Mon Sep 17 00:00:00 2001
From: Ilya Maximets <i.maximets@ovn.org>
Date: Wed, 9 Mar 2022 23:20:33 +0100
Subject: net: openvswitch: fix uAPI incompatibility with existing user space

Few years ago OVS user space made a strange choice in the commit [1]
to define types only valid for the user space inside the copy of a
kernel uAPI header.  '#ifndef __KERNEL__' and another attribute was
added later.

This leads to the inevitable clash between user space and kernel types
when the kernel uAPI is extended.  The issue was unveiled with the
addition of a new type for IPv6 extension header in kernel uAPI.

When kernel provides the OVS_KEY_ATTR_IPV6_EXTHDRS attribute to the
older user space application, application tries to parse it as
OVS_KEY_ATTR_PACKET_TYPE and discards the whole netlink message as
malformed.  Since OVS_KEY_ATTR_IPV6_EXTHDRS is supplied along with
every IPv6 packet that goes to the user space, IPv6 support is fully
broken.

Fixing that by bringing these user space attributes to the kernel
uAPI to avoid the clash.  Strictly speaking this is not the problem
of the kernel uAPI, but changing it is the only way to avoid breakage
of the older user space applications at this point.

These 2 types are explicitly rejected now since they should not be
passed to the kernel.  Additionally, OVS_KEY_ATTR_TUNNEL_INFO moved
out from the '#ifdef __KERNEL__' as there is no good reason to hide
it from the userspace.  And it's also explicitly rejected now, because
it's for in-kernel use only.

Comments with warnings were added to avoid the problem coming back.

(1 << type) converted to (1ULL << type) to avoid integer overflow on
OVS_KEY_ATTR_IPV6_EXTHDRS, since it equals 32 now.

 [1] beb75a40fdc2 ("userspace: Switching of L3 packets in L2 pipeline")

Fixes: 28a3f0601727 ("net: openvswitch: IPv6: Add IPv6 extension header support")
Link: https://lore.kernel.org/netdev/3adf00c7-fe65-3ef4-b6d7-6d8a0cad8a5f@nvidia.com
Link: https://github.com/openvswitch/ovs/commit/beb75a40fdc295bfd6521b0068b4cd12f6de507c
Reported-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Aaron Conole <aconole@redhat.com>
Link: https://lore.kernel.org/r/20220309222033.3018976-1-i.maximets@ovn.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/openvswitch.h | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 9d1710f20505..ce3e1738d427 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -351,11 +351,21 @@ enum ovs_key_attr {
 	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,   /* struct ovs_key_ct_tuple_ipv4 */
 	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,   /* struct ovs_key_ct_tuple_ipv6 */
 	OVS_KEY_ATTR_NSH,       /* Nested set of ovs_nsh_key_* */
-	OVS_KEY_ATTR_IPV6_EXTHDRS,  /* struct ovs_key_ipv6_exthdr */
 
-#ifdef __KERNEL__
-	OVS_KEY_ATTR_TUNNEL_INFO,  /* struct ip_tunnel_info */
-#endif
+	/* User space decided to squat on types 29 and 30.  They are defined
+	 * below, but should not be sent to the kernel.
+	 *
+	 * WARNING: No new types should be added unless they are defined
+	 *          for both kernel and user space (no 'ifdef's).  It's hard
+	 *          to keep compatibility otherwise.
+	 */
+	OVS_KEY_ATTR_PACKET_TYPE,   /* be32 packet type */
+	OVS_KEY_ATTR_ND_EXTENSIONS, /* IPv6 Neighbor Discovery extensions */
+
+	OVS_KEY_ATTR_TUNNEL_INFO,   /* struct ip_tunnel_info.
+				     * For in-kernel use only.
+				     */
+	OVS_KEY_ATTR_IPV6_EXTHDRS,  /* struct ovs_key_ipv6_exthdr */
 	__OVS_KEY_ATTR_MAX
 };
 
-- 
cgit v1.2.3


From 2916b7a9c7c25ecf9be2f37e567a277e861f8e3f Mon Sep 17 00:00:00 2001
From: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Date: Tue, 22 Feb 2022 20:36:39 +0530
Subject: nl80211: fix typo of NL80211_IF_TYPE_OCB in documentation

It should be NL80211_IFTYPE_OCB instead.

Signed-off-by: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Link: https://lore.kernel.org/r/1645542399-4680-1-git-send-email-quic_vjakkam@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 98ed52663d6b..0568a79097b8 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3259,7 +3259,7 @@ enum nl80211_attrs {
  *	and therefore can't be created in the normal ways, use the
  *	%NL80211_CMD_START_P2P_DEVICE and %NL80211_CMD_STOP_P2P_DEVICE
  *	commands to create and destroy one
- * @NL80211_IF_TYPE_OCB: Outside Context of a BSS
+ * @NL80211_IFTYPE_OCB: Outside Context of a BSS
  *	This mode corresponds to the MIB variable dot11OCBActivated=true
  * @NL80211_IFTYPE_NAN: NAN device interface type (not a netdev)
  * @NL80211_IFTYPE_MAX: highest interface type number currently defined
-- 
cgit v1.2.3


From b20dc3c684580ddc07eb48ee3c3dc7597cd5eebf Mon Sep 17 00:00:00 2001
From: Wojciech Drewek <wojciech.drewek@intel.com>
Date: Fri, 4 Mar 2022 17:40:42 +0100
Subject: gtp: Allow to create GTP device without FDs

Currently, when the user wants to create GTP device, he has to
provide file handles to the sockets created in userspace (IFLA_GTP_FD0,
IFLA_GTP_FD1). This behaviour is not ideal, considering the option of
adding support for GTP device creation through ip link. Ip link
application is not a good place to create such sockets.

This patch allows to create GTP device without providing
IFLA_GTP_FD0 and IFLA_GTP_FD1 arguments. If the user sets
IFLA_GTP_CREATE_SOCKETS attribute, then GTP module takes care
of creating UDP sockets by itself. Sockets are created with the
commonly known UDP ports used for GTP protocol (GTP0_PORT and
GTP1U_PORT). In this case we don't have to provide encap_destroy
because no extra deinitialization is needed, everything is covered
by udp_tunnel_sock_release.

Note: GTP instance created with only this change applied, does
not handle GTP Echo Requests. This is implemented in the following
patch.

Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/uapi/linux/if_link.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index ddca20357e7e..ebd2aa3ef809 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -887,6 +887,7 @@ enum {
 	IFLA_GTP_FD1,
 	IFLA_GTP_PDP_HASHSIZE,
 	IFLA_GTP_ROLE,
+	IFLA_GTP_CREATE_SOCKETS,
 	__IFLA_GTP_MAX,
 };
 #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
-- 
cgit v1.2.3


From 9af41cc33471ea1efa6f77e188f055cc77d0a5c5 Mon Sep 17 00:00:00 2001
From: Wojciech Drewek <wojciech.drewek@intel.com>
Date: Fri, 4 Mar 2022 17:40:43 +0100
Subject: gtp: Implement GTP echo response

Adding GTP device through ip link creates the situation where
there is no userspace daemon which would handle GTP messages
(Echo Request for example). GTP-U instance which would not respond
to echo requests would violate GTP specification.

When GTP packet arrives with GTP_ECHO_REQ message type,
GTP_ECHO_RSP is send to the sender. GTP_ECHO_RSP message
should contain information element with GTPIE_RECOVERY tag and
restart counter value. For GTPv1 restart counter is not used
and should be equal to 0, for GTPv0 restart counter contains
information provided from userspace(IFLA_GTP_RESTART_COUNT).

Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
Suggested-by: Harald Welte <laforge@gnumonks.org>
Reviewed-by: Harald Welte <laforge@gnumonks.org>
Tested-by: Harald Welte <laforge@gnumonks.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/uapi/linux/if_link.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index ebd2aa3ef809..bd24c7dc10a2 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -888,6 +888,7 @@ enum {
 	IFLA_GTP_PDP_HASHSIZE,
 	IFLA_GTP_ROLE,
 	IFLA_GTP_CREATE_SOCKETS,
+	IFLA_GTP_RESTART_COUNT,
 	__IFLA_GTP_MAX,
 };
 #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
-- 
cgit v1.2.3


From d33bd757d362699cfce3c68b53cd12b947d196f4 Mon Sep 17 00:00:00 2001
From: Wojciech Drewek <wojciech.drewek@intel.com>
Date: Fri, 4 Mar 2022 17:40:44 +0100
Subject: gtp: Implement GTP echo request

Adding GTP device through ip link creates the situation where
GTP instance is not able to send GTP echo requests.
Echo requests are used to check if GTP peer is still alive.
With this patch, gtp_genl_ops are extended by new cmd (GTP_CMD_ECHOREQ)
which allows to send echo request in the given version of GTP
protocol (v0 or v1), from the given ms address to he given
peer. TID is not inclued because in all path management
messages it should be equal to 0.

When GTP echo response is detected, multicast message is
send to everyone in the gtp_genl_family. Message contains
GTP version, ms address and peer address.

Suggested-by: Harald Welte <laforge@gnumonks.org>
Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
Reviewed-by: Harald Welte <laforge@gnumonks.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/uapi/linux/gtp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h
index 79f9191bbb24..2f61298a7b77 100644
--- a/include/uapi/linux/gtp.h
+++ b/include/uapi/linux/gtp.h
@@ -8,6 +8,7 @@ enum gtp_genl_cmds {
 	GTP_CMD_NEWPDP,
 	GTP_CMD_DELPDP,
 	GTP_CMD_GETPDP,
+	GTP_CMD_ECHOREQ,
 
 	GTP_CMD_MAX,
 };
-- 
cgit v1.2.3


From e3acda7ade0a36c5cbebc2b54d30b7f08a4ba29b Mon Sep 17 00:00:00 2001
From: Wojciech Drewek <wojciech.drewek@intel.com>
Date: Fri, 4 Mar 2022 17:40:45 +0100
Subject: net/sched: Allow flower to match on GTP options

Options are as follows: PDU_TYPE:QFI and they refernce to
the fields from the  PDU Session Protocol. PDU Session data
is conveyed in GTP-U Extension Header.

GTP-U Extension Header is described in 3GPP TS 29.281.
PDU Session Protocol is described in 3GPP TS 38.415.

PDU_TYPE -  indicates the type of the PDU Session Information (4 bits)
QFI      -  QoS Flow Identifier (6 bits)

  # ip link add gtp_dev type gtp role sgsn
  # tc qdisc add dev gtp_dev ingress
  # tc filter add dev gtp_dev protocol ip parent ffff: \
      flower \
        enc_key_id 11 \
        gtp_opts 1:8/ff:ff \
      action mirred egress redirect dev eth0

Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/uapi/linux/if_tunnel.h |  4 +++-
 include/uapi/linux/pkt_cls.h   | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 7d9105533c7b..102119628ff5 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -176,8 +176,10 @@ enum {
 #define TUNNEL_VXLAN_OPT	__cpu_to_be16(0x1000)
 #define TUNNEL_NOCACHE		__cpu_to_be16(0x2000)
 #define TUNNEL_ERSPAN_OPT	__cpu_to_be16(0x4000)
+#define TUNNEL_GTP_OPT		__cpu_to_be16(0x8000)
 
 #define TUNNEL_OPTIONS_PRESENT \
-		(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
+		(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT | \
+		TUNNEL_GTP_OPT)
 
 #endif /* _UAPI_IF_TUNNEL_H_ */
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index ee38b35c3f57..404f97fb239c 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -616,6 +616,10 @@ enum {
 					 * TCA_FLOWER_KEY_ENC_OPT_ERSPAN_
 					 * attributes
 					 */
+	TCA_FLOWER_KEY_ENC_OPTS_GTP,	/* Nested
+					 * TCA_FLOWER_KEY_ENC_OPT_GTP_
+					 * attributes
+					 */
 	__TCA_FLOWER_KEY_ENC_OPTS_MAX,
 };
 
@@ -654,6 +658,17 @@ enum {
 #define TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX \
 		(__TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX - 1)
 
+enum {
+	TCA_FLOWER_KEY_ENC_OPT_GTP_UNSPEC,
+	TCA_FLOWER_KEY_ENC_OPT_GTP_PDU_TYPE,		/* u8 */
+	TCA_FLOWER_KEY_ENC_OPT_GTP_QFI,			/* u8 */
+
+	__TCA_FLOWER_KEY_ENC_OPT_GTP_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPT_GTP_MAX \
+		(__TCA_FLOWER_KEY_ENC_OPT_GTP_MAX - 1)
+
 enum {
 	TCA_FLOWER_KEY_MPLS_OPTS_UNSPEC,
 	TCA_FLOWER_KEY_MPLS_OPTS_LSE,
-- 
cgit v1.2.3


From 3b6c6c039707f6bb7c64af2aa82a437fabb93aee Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 9 Mar 2022 19:49:48 -0800
Subject: nvdimm/region: Delete nd_blk_region infrastructure

Now that the nd_namespace_blk infrastructure is removed, delete all the
region machinery to coordinate provisioning aliased capacity between
PMEM and BLK.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/164688418803.2879318.1302315202397235855.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/uapi/linux/ndctl.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 8cf1e4884fd5..17e02b64ea2e 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -189,7 +189,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
 #define ND_DEVICE_REGION_BLK 3      /* nd_region: (parent of BLK namespaces) */
 #define ND_DEVICE_NAMESPACE_IO 4    /* legacy persistent memory */
 #define ND_DEVICE_NAMESPACE_PMEM 5  /* PMEM namespace (may alias with BLK) */
-#define ND_DEVICE_NAMESPACE_BLK 6   /* BLK namespace (may alias with PMEM) */
 #define ND_DEVICE_DAX_PMEM 7        /* Device DAX interface to pmem */
 
 enum nd_driver_flags {
@@ -198,7 +197,6 @@ enum nd_driver_flags {
 	ND_DRIVER_REGION_BLK      = 1 << ND_DEVICE_REGION_BLK,
 	ND_DRIVER_NAMESPACE_IO    = 1 << ND_DEVICE_NAMESPACE_IO,
 	ND_DRIVER_NAMESPACE_PMEM  = 1 << ND_DEVICE_NAMESPACE_PMEM,
-	ND_DRIVER_NAMESPACE_BLK   = 1 << ND_DEVICE_NAMESPACE_BLK,
 	ND_DRIVER_DAX_PMEM	  = 1 << ND_DEVICE_DAX_PMEM,
 };
 
-- 
cgit v1.2.3


From 8109517b394e6deab5fd21cc5460e82ffed229c6 Mon Sep 17 00:00:00 2001
From: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
Date: Mon, 24 Jan 2022 11:25:24 +0100
Subject: rpmsg: ctrl: Introduce new RPMSG_CREATE/RELEASE_DEV_IOCTL controls

Allow the user space application to create and release an rpmsg device
by adding RPMSG_CREATE_DEV_IOCTL and RPMSG_RELEASE_DEV_IOCTL ioctrls to
the /dev/rpmsg_ctrl interface

The RPMSG_CREATE_DEV_IOCTL Ioctl can be used to instantiate a local rpmsg
device.
Depending on the back-end implementation, the associated rpmsg driver is
probed and a NS announcement can be sent to the remote processor.

The RPMSG_RELEASE_DEV_IOCTL allows the user application to release a
rpmsg device created either by the remote processor or with the
RPMSG_CREATE_DEV_IOCTL call.
Depending on the back-end implementation, the associated rpmsg driver is
removed and a NS destroy rpmsg can be sent to the remote processor.

Suggested-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20220124102524.295783-12-arnaud.pouliquen@foss.st.com
---
 include/uapi/linux/rpmsg.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rpmsg.h b/include/uapi/linux/rpmsg.h
index f5ca8740f3fb..1637e68177d9 100644
--- a/include/uapi/linux/rpmsg.h
+++ b/include/uapi/linux/rpmsg.h
@@ -33,4 +33,14 @@ struct rpmsg_endpoint_info {
  */
 #define RPMSG_DESTROY_EPT_IOCTL	_IO(0xb5, 0x2)
 
+/**
+ * Instantiate a new local rpmsg service device.
+ */
+#define RPMSG_CREATE_DEV_IOCTL	_IOW(0xb5, 0x3, struct rpmsg_endpoint_info)
+
+/**
+ * Release a local rpmsg device.
+ */
+#define RPMSG_RELEASE_DEV_IOCTL	_IOW(0xb5, 0x4, struct rpmsg_endpoint_info)
+
 #endif
-- 
cgit v1.2.3


From b5fdf66f6eb2560784c6f60131dc567de06267dc Mon Sep 17 00:00:00 2001
From: Dave Wysochanski <dwysocha@redhat.com>
Date: Tue, 1 Mar 2022 14:37:27 -0500
Subject: NFS: Remove remaining dfprintks related to fscache and remove
 NFSDBG_FSCACHE

The fscache cookie APIs including fscache_acquire_cookie() and
fscache_relinquish_cookie() now have very good tracing.  Thus,
there is no real need for dfprintks in the NFS fscache interface.

The NFS fscache interface has removed all dfprintks so remove the
NFSDBG_FSCACHE defines.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/uapi/linux/nfs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nfs_fs.h b/include/uapi/linux/nfs_fs.h
index 3afe3767c55d..ae0de165c014 100644
--- a/include/uapi/linux/nfs_fs.h
+++ b/include/uapi/linux/nfs_fs.h
@@ -52,7 +52,7 @@
 #define NFSDBG_CALLBACK		0x0100
 #define NFSDBG_CLIENT		0x0200
 #define NFSDBG_MOUNT		0x0400
-#define NFSDBG_FSCACHE		0x0800
+#define NFSDBG_FSCACHE		0x0800 /* unused */
 #define NFSDBG_PNFS		0x1000
 #define NFSDBG_PNFS_LD		0x2000
 #define NFSDBG_STATE		0x4000
-- 
cgit v1.2.3


From 2c7d2a230237e7c43fa067d695937b7e484bb92a Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 15 Dec 2021 15:39:58 -0500
Subject: btrfs: add definition for EXTENT_TREE_V2

This adds the initial definition of the EXTENT_TREE_V2 incompat feature
flag.  This also hides the support behind CONFIG_BTRFS_DEBUG.

THIS IS A IN DEVELOPMENT FORMAT CHANGE, DO NOT USE UNLESS YOU ARE A
DEVELOPER OR A TESTER.

The format is in flux and will be added in stages, any fs will need to
be re-made between updates to the format.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 738619994e26..1cb1a3860f1d 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -309,6 +309,7 @@ struct btrfs_ioctl_fs_info_args {
 #define BTRFS_FEATURE_INCOMPAT_METADATA_UUID	(1ULL << 10)
 #define BTRFS_FEATURE_INCOMPAT_RAID1C34		(1ULL << 11)
 #define BTRFS_FEATURE_INCOMPAT_ZONED		(1ULL << 12)
+#define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2	(1ULL << 13)
 
 struct btrfs_ioctl_feature_flags {
 	__u64 compat_flags;
-- 
cgit v1.2.3


From 9c54e80ddc6bd89596a4046d451908700476fd14 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 15 Dec 2021 15:40:07 -0500
Subject: btrfs: add code to support the block group root

This code adds the on disk structures for the block group root, which
will hold the block group items for extent tree v2.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs_tree.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index 5416f1f1a77a..b069752a8ecf 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -53,6 +53,9 @@
 /* tracks free space in block groups. */
 #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
 
+/* Holds the block group items for extent tree v2. */
+#define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL
+
 /* device stats in the device tree */
 #define BTRFS_DEV_STATS_OBJECTID 0ULL
 
-- 
cgit v1.2.3


From dcb77a9ae87dc1ae2c54ea2e629da357e694b664 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 16 Aug 2021 15:58:29 -0700
Subject: btrfs: add definitions and documentation for encoded I/O ioctls

In order to allow sending and receiving compressed data without
decompressing it, we need an interface to write pre-compressed data
directly to the filesystem and the matching interface to read compressed
data without decompressing it. This adds the definitions for ioctls to
do that and detailed explanations of how to use them.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs.h | 132 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 1cb1a3860f1d..d956b2993970 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -869,6 +869,134 @@ struct btrfs_ioctl_get_subvol_rootref_args {
 		__u8 align[7];
 };
 
+/*
+ * Data and metadata for an encoded read or write.
+ *
+ * Encoded I/O bypasses any encoding automatically done by the filesystem (e.g.,
+ * compression). This can be used to read the compressed contents of a file or
+ * write pre-compressed data directly to a file.
+ *
+ * BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially
+ * preadv/pwritev with additional metadata about how the data is encoded and the
+ * size of the unencoded data.
+ *
+ * BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills
+ * the metadata fields, and returns the size of the encoded data. It reads one
+ * extent per call. It can also read data which is not encoded.
+ *
+ * BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data
+ * from the iovecs, and returns the size of the encoded data. Note that the
+ * encoded data is not validated when it is written; if it is not valid (e.g.,
+ * it cannot be decompressed), then a subsequent read may return an error.
+ *
+ * Since the filesystem page cache contains decoded data, encoded I/O bypasses
+ * the page cache. Encoded I/O requires CAP_SYS_ADMIN.
+ */
+struct btrfs_ioctl_encoded_io_args {
+	/* Input parameters for both reads and writes. */
+
+	/*
+	 * iovecs containing encoded data.
+	 *
+	 * For reads, if the size of the encoded data is larger than the sum of
+	 * iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with
+	 * ENOBUFS.
+	 *
+	 * For writes, the size of the encoded data is the sum of iov[n].iov_len
+	 * for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may
+	 * increase in the future). This must also be less than or equal to
+	 * unencoded_len.
+	 */
+	const struct iovec __user *iov;
+	/* Number of iovecs. */
+	unsigned long iovcnt;
+	/*
+	 * Offset in file.
+	 *
+	 * For writes, must be aligned to the sector size of the filesystem.
+	 */
+	__s64 offset;
+	/* Currently must be zero. */
+	__u64 flags;
+
+	/*
+	 * For reads, the following members are output parameters that will
+	 * contain the returned metadata for the encoded data.
+	 * For writes, the following members must be set to the metadata for the
+	 * encoded data.
+	 */
+
+	/*
+	 * Length of the data in the file.
+	 *
+	 * Must be less than or equal to unencoded_len - unencoded_offset. For
+	 * writes, must be aligned to the sector size of the filesystem unless
+	 * the data ends at or beyond the current end of the file.
+	 */
+	__u64 len;
+	/*
+	 * Length of the unencoded (i.e., decrypted and decompressed) data.
+	 *
+	 * For writes, must be no more than 128 KiB (this limit may increase in
+	 * the future). If the unencoded data is actually longer than
+	 * unencoded_len, then it is truncated; if it is shorter, then it is
+	 * extended with zeroes.
+	 */
+	__u64 unencoded_len;
+	/*
+	 * Offset from the first byte of the unencoded data to the first byte of
+	 * logical data in the file.
+	 *
+	 * Must be less than unencoded_len.
+	 */
+	__u64 unencoded_offset;
+	/*
+	 * BTRFS_ENCODED_IO_COMPRESSION_* type.
+	 *
+	 * For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE.
+	 */
+	__u32 compression;
+	/* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */
+	__u32 encryption;
+	/*
+	 * Reserved for future expansion.
+	 *
+	 * For reads, always returned as zero. Users should check for non-zero
+	 * bytes. If there are any, then the kernel has a newer version of this
+	 * structure with additional information that the user definition is
+	 * missing.
+	 *
+	 * For writes, must be zeroed.
+	 */
+	__u8 reserved[64];
+};
+
+/* Data is not compressed. */
+#define BTRFS_ENCODED_IO_COMPRESSION_NONE 0
+/* Data is compressed as a single zlib stream. */
+#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1
+/*
+ * Data is compressed as a single zstd frame with the windowLog compression
+ * parameter set to no more than 17.
+ */
+#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2
+/*
+ * Data is compressed sector by sector (using the sector size indicated by the
+ * name of the constant) with LZO1X and wrapped in the format documented in
+ * fs/btrfs/lzo.c. For writes, the compression sector size must match the
+ * filesystem sector size.
+ */
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7
+#define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8
+
+/* Data is not encrypted. */
+#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0
+#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1
+
 /* Error codes as returned by the kernel */
 enum btrfs_err_code {
 	BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
@@ -997,5 +1125,9 @@ enum btrfs_err_code {
 				struct btrfs_ioctl_ino_lookup_user_args)
 #define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \
 				struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_ENCODED_READ _IOR(BTRFS_IOCTL_MAGIC, 64, \
+				    struct btrfs_ioctl_encoded_io_args)
+#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \
+				     struct btrfs_ioctl_encoded_io_args)
 
 #endif /* _UAPI_LINUX_BTRFS_H */
-- 
cgit v1.2.3


From 65722ff6181aa52c3d5b0929004af22a3a63e148 Mon Sep 17 00:00:00 2001
From: David Yat Sin <david.yatsin@amd.com>
Date: Tue, 8 Mar 2022 14:00:50 -0500
Subject: drm/amdkfd: CRIU export dmabuf handles for GTT BOs

Export dmabuf handles for GTT BOs so that their contents can be accessed
using SDMA during checkpoint/restore.

v2: Squash in fix from David to set dmabuf handle to invalid for BOs
that cannot be accessed using SDMA during checkpoint/restore.

Signed-off-by: David Yat Sin <david.yatsin@amd.com>
Reviewed-by : Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index b40687bf1014..42975e940758 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -33,9 +33,10 @@
  * - 1.5 - Add SVM API
  * - 1.6 - Query clear flags in SVM get_attr API
  * - 1.7 - Checkpoint Restore (CRIU) API
+ * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs
  */
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 7
+#define KFD_IOCTL_MINOR_VERSION 8
 
 struct kfd_ioctl_get_version_args {
 	__u32 major_version;	/* from KFD */
@@ -195,6 +196,8 @@ struct kfd_ioctl_dbg_wave_control_args {
 	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
 };
 
+#define KFD_INVALID_FD     0xffffffff
+
 /* Matching HSA_EVENTTYPE */
 #define KFD_IOC_EVENT_SIGNAL			0
 #define KFD_IOC_EVENT_NODECHANGE		1
-- 
cgit v1.2.3


From 435fe1c0c1f74b682dba85641406abf4337aade6 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Wed, 16 Mar 2022 08:15:57 +0200
Subject: net: geneve: support IPv4/IPv6 as inner protocol

This patch adds support for encapsulating IPv4/IPv6 within GENEVE.

In order to use this, a new IFLA_GENEVE_INNER_PROTO_INHERIT flag needs
to be provided at device creation. This property cannot be changed for
the time being.

In case IP traffic is received on a non-tun device the drop count is
increased.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Link: https://lore.kernel.org/r/20220316061557.431872-1-eyal.birger@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/if_link.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index bd24c7dc10a2..cc284c048e69 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -842,6 +842,7 @@ enum {
 	IFLA_GENEVE_LABEL,
 	IFLA_GENEVE_TTL_INHERIT,
 	IFLA_GENEVE_DF,
+	IFLA_GENEVE_INNER_PROTO_INHERIT,
 	__IFLA_GENEVE_MAX
 };
 #define IFLA_GENEVE_MAX	(__IFLA_GENEVE_MAX - 1)
-- 
cgit v1.2.3


From ec7328b59176227216c461601c6bd0e922232a9b Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Wed, 16 Mar 2022 16:08:43 +0100
Subject: net: bridge: mst: Multiple Spanning Tree (MST) mode

Allow the user to switch from the current per-VLAN STP mode to an MST
mode.

Up to this point, per-VLAN STP states where always isolated from each
other. This is in contrast to the MSTP standard (802.1Q-2018, Clause
13.5), where VLANs are grouped into MST instances (MSTIs), and the
state is managed on a per-MSTI level, rather that at the per-VLAN
level.

Perhaps due to the prevalence of the standard, many switching ASICs
are built after the same model. Therefore, add a corresponding MST
mode to the bridge, which we can later add offloading support for in a
straight-forward way.

For now, all VLANs are fixed to MSTI 0, also called the Common
Spanning Tree (CST). That is, all VLANs will follow the port-global
state.

Upcoming changes will make this actually useful by allowing VLANs to
be mapped to arbitrary MSTIs and allow individual MSTI states to be
changed.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_bridge.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 2711c3522010..30a242195ced 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -759,6 +759,7 @@ struct br_mcast_stats {
 enum br_boolopt_id {
 	BR_BOOLOPT_NO_LL_LEARN,
 	BR_BOOLOPT_MCAST_VLAN_SNOOPING,
+	BR_BOOLOPT_MST_ENABLE,
 	BR_BOOLOPT_MAX
 };
 
-- 
cgit v1.2.3


From 8c678d60562f3e5f6d0a5f5465e27930ffedb8ca Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Wed, 16 Mar 2022 16:08:44 +0100
Subject: net: bridge: mst: Allow changing a VLAN's MSTI

Allow a VLAN to move out of the CST (MSTI 0), to an independent tree.

The user manages the VID to MSTI mappings via a global VLAN
setting. The proposed iproute2 interface would be:

    bridge vlan global set dev br0 vid <VID> msti <MSTI>

Changing the state in non-zero MSTIs is still not supported, but will
be addressed in upcoming changes.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_bridge.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 30a242195ced..f60244b747ae 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -564,6 +564,7 @@ enum {
 	BRIDGE_VLANDB_GOPTS_MCAST_QUERIER,
 	BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS,
 	BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE,
+	BRIDGE_VLANDB_GOPTS_MSTI,
 	__BRIDGE_VLANDB_GOPTS_MAX
 };
 #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1)
-- 
cgit v1.2.3


From 122c29486e1ff78033c45d0d31c710e7dc8945a5 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Wed, 16 Mar 2022 16:08:45 +0100
Subject: net: bridge: mst: Support setting and reporting MST port states

Make it possible to change the port state in a given MSTI by extending
the bridge port netlink interface (RTM_SETLINK on PF_BRIDGE).The
proposed iproute2 interface would be:

    bridge mst set dev <PORT> msti <MSTI> state <STATE>

Current states in all applicable MSTIs can also be dumped via a
corresponding RTM_GETLINK. The proposed iproute interface looks like
this:

$ bridge mst
port              msti
vb1               0
		    state forwarding
		  100
		    state disabled
vb2               0
		    state forwarding
		  100
		    state forwarding

The preexisting per-VLAN states are still valid in the MST
mode (although they are read-only), and can be queried as usual if one
is interested in knowing a particular VLAN's state without having to
care about the VID to MSTI mapping (in this example VLAN 20 and 30 are
bound to MSTI 100):

$ bridge -d vlan
port              vlan-id
vb1               10
		    state forwarding mcast_router 1
		  20
		    state disabled mcast_router 1
		  30
		    state disabled mcast_router 1
		  40
		    state forwarding mcast_router 1
vb2               10
		    state forwarding mcast_router 1
		  20
		    state forwarding mcast_router 1
		  30
		    state forwarding mcast_router 1
		  40
		    state forwarding mcast_router 1

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_bridge.h | 16 ++++++++++++++++
 include/uapi/linux/rtnetlink.h |  1 +
 2 files changed, 17 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index f60244b747ae..a86a7e7b811f 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -122,6 +122,7 @@ enum {
 	IFLA_BRIDGE_VLAN_TUNNEL_INFO,
 	IFLA_BRIDGE_MRP,
 	IFLA_BRIDGE_CFM,
+	IFLA_BRIDGE_MST,
 	__IFLA_BRIDGE_MAX,
 };
 #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1)
@@ -453,6 +454,21 @@ enum {
 
 #define IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX (__IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX - 1)
 
+enum {
+	IFLA_BRIDGE_MST_UNSPEC,
+	IFLA_BRIDGE_MST_ENTRY,
+	__IFLA_BRIDGE_MST_MAX,
+};
+#define IFLA_BRIDGE_MST_MAX (__IFLA_BRIDGE_MST_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MST_ENTRY_UNSPEC,
+	IFLA_BRIDGE_MST_ENTRY_MSTI,
+	IFLA_BRIDGE_MST_ENTRY_STATE,
+	__IFLA_BRIDGE_MST_ENTRY_MAX,
+};
+#define IFLA_BRIDGE_MST_ENTRY_MAX (__IFLA_BRIDGE_MST_ENTRY_MAX - 1)
+
 struct bridge_stp_xstats {
 	__u64 transition_blk;
 	__u64 transition_fwd;
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 51530aade46e..83849a37db5b 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -817,6 +817,7 @@ enum {
 #define RTEXT_FILTER_MRP	(1 << 4)
 #define RTEXT_FILTER_CFM_CONFIG	(1 << 5)
 #define RTEXT_FILTER_CFM_STATUS	(1 << 6)
+#define RTEXT_FILTER_MST	(1 << 7)
 
 /* End of information exported to user level */
 
-- 
cgit v1.2.3


From 0dcac272540613d41c05e89679e4ddb978b612f1 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 16 Mar 2022 13:24:09 +0100
Subject: bpf: Add multi kprobe link

Adding new link type BPF_LINK_TYPE_KPROBE_MULTI that attaches kprobe
program through fprobe API.

The fprobe API allows to attach probe on multiple functions at once
very fast, because it works on top of ftrace. On the other hand this
limits the probe point to the function entry or return.

The kprobe program gets the same pt_regs input ctx as when it's attached
through the perf API.

Adding new attach type BPF_TRACE_KPROBE_MULTI that allows attachment
kprobe to multiple function with new link.

User provides array of addresses or symbols with count to attach the
kprobe program to. The new link_create uapi interface looks like:

  struct {
          __u32           flags;
          __u32           cnt;
          __aligned_u64   syms;
          __aligned_u64   addrs;
  } kprobe_multi;

The flags field allows single BPF_TRACE_KPROBE_MULTI bit to create
return multi kprobe.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220316122419.933957-4-jolsa@kernel.org
---
 include/uapi/linux/bpf.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 99fab54ae9c0..d77f47af7752 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -997,6 +997,7 @@ enum bpf_attach_type {
 	BPF_SK_REUSEPORT_SELECT,
 	BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
 	BPF_PERF_EVENT,
+	BPF_TRACE_KPROBE_MULTI,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -1011,6 +1012,7 @@ enum bpf_link_type {
 	BPF_LINK_TYPE_NETNS = 5,
 	BPF_LINK_TYPE_XDP = 6,
 	BPF_LINK_TYPE_PERF_EVENT = 7,
+	BPF_LINK_TYPE_KPROBE_MULTI = 8,
 
 	MAX_BPF_LINK_TYPE,
 };
@@ -1118,6 +1120,11 @@ enum bpf_link_type {
  */
 #define BPF_F_XDP_HAS_FRAGS	(1U << 5)
 
+/* link_create.kprobe_multi.flags used in LINK_CREATE command for
+ * BPF_TRACE_KPROBE_MULTI attach type to create return probe.
+ */
+#define BPF_F_KPROBE_MULTI_RETURN	(1U << 0)
+
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * the following extensions:
  *
@@ -1475,6 +1482,12 @@ union bpf_attr {
 				 */
 				__u64		bpf_cookie;
 			} perf_event;
+			struct {
+				__u32		flags;
+				__u32		cnt;
+				__aligned_u64	syms;
+				__aligned_u64	addrs;
+			} kprobe_multi;
 		};
 	} link_create;
 
-- 
cgit v1.2.3


From ca74823c6e16dd42b7cf60d9fdde80e2a81a67bb Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 16 Mar 2022 13:24:12 +0100
Subject: bpf: Add cookie support to programs attached with kprobe multi link

Adding support to call bpf_get_attach_cookie helper from
kprobe programs attached with kprobe multi link.

The cookie is provided by array of u64 values, where each
value is paired with provided function address or symbol
with the same array index.

When cookie array is provided it's sorted together with
addresses (check bpf_kprobe_multi_cookie_swap). This way
we can find cookie based on the address in
bpf_get_attach_cookie helper.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220316122419.933957-7-jolsa@kernel.org
---
 include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d77f47af7752..7604e7d5438f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1487,6 +1487,7 @@ union bpf_attr {
 				__u32		cnt;
 				__aligned_u64	syms;
 				__aligned_u64	addrs;
+				__aligned_u64	cookies;
 			} kprobe_multi;
 		};
 	} link_create;
-- 
cgit v1.2.3


From 54f586a9153201c6cff55e1f561990c78bd99aa7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 16 Mar 2022 21:27:51 +0100
Subject: rfkill: make new event layout opt-in

Again new complaints surfaced that we had broken the ABI here,
although previously all the userspace tools had agreed that it
was their mistake and fixed it. Yet now there are cases (e.g.
RHEL) that want to run old userspace with newer kernels, and
thus are broken.

Since this is a bit of a whack-a-mole thing, change the whole
extensibility scheme of rfkill to no longer just rely on the
message lengths, but instead require userspace to opt in via a
new ioctl to a given maximum event size that it is willing to
understand.

By default, set that to RFKILL_EVENT_SIZE_V1 (8), so that the
behaviour for userspace not calling the ioctl will look as if
it's just running on an older kernel.

Fixes: 14486c82612a ("rfkill: add a reason to the HW rfkill state")
Cc: stable@vger.kernel.org # 5.11+
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20220316212749.16491491b270.Ifcb1950998330a596f29a2a162e00b7546a1d6d0@changeid
---
 include/uapi/linux/rfkill.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h
index 9b77cfc42efa..283c5a7b3f2c 100644
--- a/include/uapi/linux/rfkill.h
+++ b/include/uapi/linux/rfkill.h
@@ -159,8 +159,16 @@ struct rfkill_event_ext {
  * old behaviour for all userspace, unless it explicitly opts in to the
  * rules outlined here by using the new &struct rfkill_event_ext.
  *
- * Userspace using &struct rfkill_event_ext must adhere to the following
- * rules
+ * Additionally, some other userspace (bluez, g-s-d) was reading with a
+ * large size but as streaming reads rather than message-based, or with
+ * too strict checks for the returned size. So eventually, we completely
+ * reverted this, and extended messages need to be opted in to by using
+ * an ioctl:
+ *
+ *  ioctl(fd, RFKILL_IOCTL_MAX_SIZE, sizeof(struct rfkill_event_ext));
+ *
+ * Userspace using &struct rfkill_event_ext and the ioctl must adhere to
+ * the following rules:
  *
  * 1. accept short writes, optionally using them to detect that it's
  *    running on an older kernel;
@@ -175,6 +183,8 @@ struct rfkill_event_ext {
 #define RFKILL_IOC_MAGIC	'R'
 #define RFKILL_IOC_NOINPUT	1
 #define RFKILL_IOCTL_NOINPUT	_IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT)
+#define RFKILL_IOC_MAX_SIZE	2
+#define RFKILL_IOCTL_MAX_SIZE	_IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_EXT_SIZE, __u32)
 
 /* and that's all userspace gets */
 
-- 
cgit v1.2.3


From 73799a889262b4675799bec20a2765be6d6a3f98 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Tue, 15 Mar 2022 15:38:54 -0400
Subject: counter: add new COUNTER_EVENT_CHANGE_OF_STATE

Add new counter event to notify user space about every new counter
pulse.

Link: https://lore.kernel.org/r/20220203135727.2374052-2-o.rempel@pengutronix.de
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Link: https://lore.kernel.org/r/486a5de67414470449efb84d06a2f2214f4bb31d.1647373009.git.vilhelm.gray@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/counter.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h
index d0aa95aeff7b..96c5ffd368ad 100644
--- a/include/uapi/linux/counter.h
+++ b/include/uapi/linux/counter.h
@@ -61,6 +61,8 @@ enum counter_event_type {
 	COUNTER_EVENT_THRESHOLD,
 	/* Index signal detected */
 	COUNTER_EVENT_INDEX,
+	/* State of counter is changed */
+	COUNTER_EVENT_CHANGE_OF_STATE,
 };
 
 /**
-- 
cgit v1.2.3


From 336d4b814bf078fa698488632c19beca47308896 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 27 Jan 2022 12:15:32 -0600
Subject: ptrace: Move setting/clearing ptrace_message into ptrace_stop

Today ptrace_message is easy to overlook as it not a core part of
ptrace_stop.  It has been overlooked so much that there are places
that set ptrace_message and don't clear it, and places that never set
it.  So if you get an unlucky sequence of events the ptracer may be
able to read a ptrace_message that does not apply to the current
ptrace stop.

Move setting of ptrace_message into ptrace_stop so that it always gets
set before the stop, and always gets cleared after the stop.  This
prevents non-sense from being reported to userspace and makes
ptrace_message more visible in the ptrace helper functions so that
kernel developers can see it.

Link: https://lkml.kernel.org/r/87bky67qfv.fsf_-_@email.froward.int.ebiederm.org
Acked-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/uapi/linux/ptrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index b7af92e07d1f..195ae64a8c87 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -114,7 +114,7 @@ struct ptrace_rseq_configuration {
 
 /*
  * These values are stored in task->ptrace_message
- * by ptrace_report_syscall_* to describe the current syscall-stop.
+ * by ptrace_stop to describe the current syscall-stop.
  */
 #define PTRACE_EVENTMSG_SYSCALL_ENTRY	1
 #define PTRACE_EVENTMSG_SYSCALL_EXIT	2
-- 
cgit v1.2.3


From ee2a098851bfbe8bcdd964c0121f4246f00ff41e Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Mar 2022 11:20:41 -0700
Subject: bpf: Adjust BPF stack helper functions to accommodate skip > 0

Let's say that the caller has storage for num_elem stack frames.  Then,
the BPF stack helper functions walk the stack for only num_elem frames.
This means that if skip > 0, one keeps only 'num_elem - skip' frames.

This is because it sets init_nr in the perf_callchain_entry to the end
of the buffer to save num_elem entries only.  I believe it was because
the perf callchain code unwound the stack frames until it reached the
global max size (sysctl_perf_event_max_stack).

However it now has perf_callchain_entry_ctx.max_stack to limit the
iteration locally.  This simplifies the code to handle init_nr in the
BPF callstack entries and removes the confusion with the perf_event's
__PERF_SAMPLE_CALLCHAIN_EARLY which sets init_nr to 0.

Also change the comment on bpf_get_stack() in the header file to be
more explicit what the return value means.

Fixes: c195651e565a ("bpf: add bpf_get_stack helper")
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/30a7b5d5-6726-1cc2-eaee-8da2828a9a9c@oracle.com
Link: https://lore.kernel.org/bpf/20220314182042.71025-1-namhyung@kernel.org

Based-on-patch-by: Eugene Loh <eugene.loh@oracle.com>
---
 include/uapi/linux/bpf.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7604e7d5438f..d14b10b85e51 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3009,8 +3009,8 @@ union bpf_attr {
  *
  * 			# sysctl kernel.perf_event_max_stack=<new value>
  * 	Return
- * 		A non-negative value equal to or less than *size* on success,
- * 		or a negative error in case of failure.
+ * 		The non-negative copied *buf* length equal to or less than
+ * 		*size* on success, or a negative error in case of failure.
  *
  * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
  * 	Description
@@ -4316,8 +4316,8 @@ union bpf_attr {
  *
  *			# sysctl kernel.perf_event_max_stack=<new value>
  *	Return
- *		A non-negative value equal to or less than *size* on success,
- *		or a negative error in case of failure.
+ * 		The non-negative copied *buf* length equal to or less than
+ * 		*size* on success, or a negative error in case of failure.
  *
  * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
  *	Description
-- 
cgit v1.2.3


From 6d8491910fcd3324d0f0ece3bd68e85ead3a04d7 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Tue, 1 Mar 2022 06:03:47 +0000
Subject: KVM: x86: Introduce KVM_CAP_DISABLE_QUIRKS2

KVM_CAP_DISABLE_QUIRKS is irrevocably broken. The capability does not
advertise the set of quirks which may be disabled to userspace, so it is
impossible to predict the behavior of KVM. Worse yet,
KVM_CAP_DISABLE_QUIRKS will tolerate any value for cap->args[0], meaning
it fails to reject attempts to set invalid quirk bits.

The only valid workaround for the quirky quirks API is to add a new CAP.
Actually advertise the set of quirks that can be disabled to userspace
so it can predict KVM's behavior. Reject values for cap->args[0] that
contain invalid bits.

Finally, add documentation for the new capability and describe the
existing quirks.

Signed-off-by: Oliver Upton <oupton@google.com>
Message-Id: <20220301060351.442881-5-oupton@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d2f1efc3aa35..91a6fe4e02c0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1143,6 +1143,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PPC_AIL_MODE_3 210
 #define KVM_CAP_S390_MEM_OP_EXTENSION 211
 #define KVM_CAP_PMU_CAPABILITY 212
+#define KVM_CAP_DISABLE_QUIRKS2 213
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 824ddc601adc2cc48efb7f58b57997986c1c1276 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Tue, 22 Mar 2022 14:45:32 -0700
Subject: userfaultfd: provide unmasked address on page-fault

Userfaultfd is supposed to provide the full address (i.e., unmasked) of
the faulting access back to userspace.  However, that is not the case for
quite some time.

Even running "userfaultfd_demo" from the userfaultfd man page provides the
wrong output (and contradicts the man page).  Notice that
"UFFD_EVENT_PAGEFAULT event" shows the masked address (7fc5e30b3000) and
not the first read address (0x7fc5e30b300f).

	Address returned by mmap() = 0x7fc5e30b3000

	fault_handler_thread():
	    poll() returns: nready = 1; POLLIN = 1; POLLERR = 0
	    UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fc5e30b3000
		(uffdio_copy.copy returned 4096)
	Read address 0x7fc5e30b300f in main(): A
	Read address 0x7fc5e30b340f in main(): A
	Read address 0x7fc5e30b380f in main(): A
	Read address 0x7fc5e30b3c0f in main(): A

The exact address is useful for various reasons and specifically for
prefetching decisions.  If it is known that the memory is populated by
certain objects whose size is not page-aligned, then based on the faulting
address, the uffd-monitor can decide whether to prefetch and prefault the
adjacent page.

This bug has been for quite some time in the kernel: since commit
1a29d85eb0f1 ("mm: use vmf->address instead of of vmf->virtual_address")
vmf->virtual_address"), which dates back to 2016.  A concern has been
raised that existing userspace application might rely on the old/wrong
behavior in which the address is masked.  Therefore, it was suggested to
provide the masked address unless the user explicitly asks for the exact
address.

Add a new userfaultfd feature UFFD_FEATURE_EXACT_ADDRESS to direct
userfaultfd to provide the exact address.  Add a new "real_address" field
to vmf to hold the unmasked address.  Provide the address to userspace
accordingly.

Initialize real_address in various code-paths to be consistent with
address, even when it is not used, to be on the safe side.

[namit@vmware.com: initialize real_address on all code paths, per Jan]
  Link: https://lkml.kernel.org/r/20220226022655.350562-1-namit@vmware.com
[akpm@linux-foundation.org: fix typo in comment, per Jan]

Link: https://lkml.kernel.org/r/20220218041003.3508-1-namit@vmware.com
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/userfaultfd.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 05b31d60acf6..ef739054cb1c 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -32,7 +32,8 @@
 			   UFFD_FEATURE_SIGBUS |		\
 			   UFFD_FEATURE_THREAD_ID |		\
 			   UFFD_FEATURE_MINOR_HUGETLBFS |	\
-			   UFFD_FEATURE_MINOR_SHMEM)
+			   UFFD_FEATURE_MINOR_SHMEM |		\
+			   UFFD_FEATURE_EXACT_ADDRESS)
 #define UFFD_API_IOCTLS				\
 	((__u64)1 << _UFFDIO_REGISTER |		\
 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
@@ -189,6 +190,10 @@ struct uffdio_api {
 	 *
 	 * UFFD_FEATURE_MINOR_SHMEM indicates the same support as
 	 * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead.
+	 *
+	 * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page
+	 * faults would be provided and the offset within the page would not be
+	 * masked.
 	 */
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
 #define UFFD_FEATURE_EVENT_FORK			(1<<1)
@@ -201,6 +206,7 @@ struct uffdio_api {
 #define UFFD_FEATURE_THREAD_ID			(1<<8)
 #define UFFD_FEATURE_MINOR_HUGETLBFS		(1<<9)
 #define UFFD_FEATURE_MINOR_SHMEM		(1<<10)
+#define UFFD_FEATURE_EXACT_ADDRESS		(1<<11)
 	__u64 features;
 
 	__u64 ioctls;
-- 
cgit v1.2.3


From e99653afeb9585350644c5ae4b0ca987cbe8d053 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 9 Mar 2022 17:22:53 +0100
Subject: rtc: add new RTC_FEATURE_ALARM_WAKEUP_ONLY feature

Some RTCs have an IRQ pin that is not connected to a CPU interrupt but
rather directly to a PMIC or power supply. In that case, it is still useful
to be able to set alarms but we shouldn't expect interrupts.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20220309162301.61679-22-alexandre.belloni@bootlin.com
---
 include/uapi/linux/rtc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h
index 03e5b776e597..97aca4503a6a 100644
--- a/include/uapi/linux/rtc.h
+++ b/include/uapi/linux/rtc.h
@@ -133,7 +133,8 @@ struct rtc_param {
 #define RTC_FEATURE_UPDATE_INTERRUPT	4
 #define RTC_FEATURE_CORRECTION		5
 #define RTC_FEATURE_BACKUP_SWITCH_MODE	6
-#define RTC_FEATURE_CNT			7
+#define RTC_FEATURE_ALARM_WAKEUP_ONLY	7
+#define RTC_FEATURE_CNT			8
 
 /* parameter list */
 #define RTC_PARAM_FEATURES		0
-- 
cgit v1.2.3


From c724c866bb70cb8c607081a26823a1f0ebde4387 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 23 Mar 2022 16:05:29 -0700
Subject: linux/types.h: remove unnecessary __bitwise__

There are no users of "__bitwise__" except the definition of
"__bitwise".  Remove __bitwise__ and define __bitwise directly.

This is a follow-up to 05de97003c77 ("linux/types.h: enable endian
checks for all sparse builds").

[akpm@linux-foundation.org: change the tools/include/linux/types.h definition also]

Link: https://lkml.kernel.org/r/20220310220927.245704-2-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/types.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h
index f6d2f83cbe29..71696f424ac8 100644
--- a/include/uapi/linux/types.h
+++ b/include/uapi/linux/types.h
@@ -20,11 +20,10 @@
  */
 
 #ifdef __CHECKER__
-#define __bitwise__ __attribute__((bitwise))
+#define __bitwise	__attribute__((bitwise))
 #else
-#define __bitwise__
+#define __bitwise
 #endif
-#define __bitwise __bitwise__
 
 typedef __u16 __bitwise __le16;
 typedef __u16 __bitwise __be16;
-- 
cgit v1.2.3


From 179fd6ba3bacbf7b19cbdf9b14be109d54318394 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 23 Mar 2022 16:05:32 -0700
Subject: Documentation/sparse: add hints about __CHECKER__

Several attributes depend on __CHECKER__, but previously there was no
clue in the tree about when __CHECKER__ might be defined.  Add hints at
the most common places (__kernel, __user, __iomem, __bitwise) and in the
sparse documentation.

Link: https://lkml.kernel.org/r/20220310220927.245704-3-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: "Michael S . Tsirkin" <mst@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h
index 71696f424ac8..c4dc597f3dcf 100644
--- a/include/uapi/linux/types.h
+++ b/include/uapi/linux/types.h
@@ -19,6 +19,7 @@
  * any application/library that wants linux/types.h.
  */
 
+/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */
 #ifdef __CHECKER__
 #define __bitwise	__attribute__((bitwise))
 #else
-- 
cgit v1.2.3


From 7ef66d186eb95f987a97fb3329b65c840e2dc9bf Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 24 Mar 2022 06:53:18 -0600
Subject: io_uring: remove IORING_CQE_F_MSG

This was introduced with the message ring opcode, but isn't strictly
required for the request itself. The sender can encode what is needed
in user_data, which is passed to the receiver. It's unclear if having
a separate flag that essentially says "This CQE did not originate from
an SQE on this ring" provides any real utility to applications. While
we can always re-introduce a flag to provide this information, we cannot
take it away at a later point in time.

Remove the flag while we still can, before it's in a released kernel.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index d2be4eb22008..784adc6f6ed2 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -201,11 +201,9 @@ struct io_uring_cqe {
  *
  * IORING_CQE_F_BUFFER	If set, the upper 16 bits are the buffer ID
  * IORING_CQE_F_MORE	If set, parent SQE will generate more CQE entries
- * IORING_CQE_F_MSG	If set, CQE was generated with IORING_OP_MSG_RING
  */
 #define IORING_CQE_F_BUFFER		(1U << 0)
 #define IORING_CQE_F_MORE		(1U << 1)
-#define IORING_CQE_F_MSG		(1U << 2)
 
 enum {
 	IORING_CQE_BUFFER_SHIFT		= 16,
-- 
cgit v1.2.3


From 90a6951b58e935124eeb7ecd9fbc2426f841ac0c Mon Sep 17 00:00:00 2001
From: Gautam Dawar <gautam.dawar@xilinx.com>
Date: Tue, 15 Feb 2022 11:04:29 +0530
Subject: Add definition of VIRTIO_F_IN_ORDER feature bit

This patch adds the definition of VIRTIO_F_IN_ORDER feature bit
in the relevant header file to make it available in QEMU's
linux standard header file virtio_config.h, which is updated using
scripts/update-linux-headers.sh

Signed-off-by: Gautam Dawar <gdawar@xilinx.com>
Link: https://lore.kernel.org/r/20220215053430.24650-1-gdawar@xilinx.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 include/uapi/linux/virtio_config.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
index b5eda06f0d57..f0fb0ae021c0 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h
@@ -82,6 +82,12 @@
 /* This feature indicates support for the packed virtqueue layout. */
 #define VIRTIO_F_RING_PACKED		34
 
+/*
+ * Inorder feature indicates that all buffers are used by the device
+ * in the same order in which they have been made available.
+ */
+#define VIRTIO_F_IN_ORDER		35
+
 /*
  * This feature indicates that memory accesses by the driver and the
  * device are ordered in a way described by the platform.
-- 
cgit v1.2.3


From 13d640a3e9a3ac7ec694843d3d3b785e85fb8cb8 Mon Sep 17 00:00:00 2001
From: zhenwei pi <pizhenwei@bytedance.com>
Date: Wed, 2 Mar 2022 11:39:14 +0800
Subject: virtio_crypto: Introduce VIRTIO_CRYPTO_NOSPC

Base on the lastest virtio crypto spec, define VIRTIO_CRYPTO_NOSPC.

Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
Link: https://lore.kernel.org/r/20220302033917.1295334-2-pizhenwei@bytedance.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_crypto.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h
index a03932f10565..1166a49084b0 100644
--- a/include/uapi/linux/virtio_crypto.h
+++ b/include/uapi/linux/virtio_crypto.h
@@ -408,6 +408,7 @@ struct virtio_crypto_op_data_req {
 #define VIRTIO_CRYPTO_BADMSG    2
 #define VIRTIO_CRYPTO_NOTSUPP   3
 #define VIRTIO_CRYPTO_INVSESS   4 /* Invalid session id */
+#define VIRTIO_CRYPTO_NOSPC     5 /* no free session ID */
 
 /* The accelerator hardware is ready */
 #define VIRTIO_CRYPTO_S_HW_READY  (1 << 0)
-- 
cgit v1.2.3


From 24e19590628b58578748eeaec8140bf9c9dc00d9 Mon Sep 17 00:00:00 2001
From: zhenwei pi <pizhenwei@bytedance.com>
Date: Wed, 2 Mar 2022 11:39:15 +0800
Subject: virtio-crypto: introduce akcipher service

Introduce asymmetric service definition, asymmetric operations and
several well known algorithms.

Co-developed-by: lei he <helei.sig11@bytedance.com>
Signed-off-by: lei he <helei.sig11@bytedance.com>
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
Link: https://lore.kernel.org/r/20220302033917.1295334-3-pizhenwei@bytedance.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Gonglei <arei.gonglei@huawei.com>
---
 include/uapi/linux/virtio_crypto.h | 81 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 80 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h
index 1166a49084b0..71a54a6849ca 100644
--- a/include/uapi/linux/virtio_crypto.h
+++ b/include/uapi/linux/virtio_crypto.h
@@ -37,6 +37,7 @@
 #define VIRTIO_CRYPTO_SERVICE_HASH   1
 #define VIRTIO_CRYPTO_SERVICE_MAC    2
 #define VIRTIO_CRYPTO_SERVICE_AEAD   3
+#define VIRTIO_CRYPTO_SERVICE_AKCIPHER 4
 
 #define VIRTIO_CRYPTO_OPCODE(service, op)   (((service) << 8) | (op))
 
@@ -57,6 +58,10 @@ struct virtio_crypto_ctrl_header {
 	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02)
 #define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \
 	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03)
+#define VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x04)
+#define VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x05)
 	__le32 opcode;
 	__le32 algo;
 	__le32 flag;
@@ -180,6 +185,58 @@ struct virtio_crypto_aead_create_session_req {
 	__u8 padding[32];
 };
 
+struct virtio_crypto_rsa_session_para {
+#define VIRTIO_CRYPTO_RSA_RAW_PADDING   0
+#define VIRTIO_CRYPTO_RSA_PKCS1_PADDING 1
+	__le32 padding_algo;
+
+#define VIRTIO_CRYPTO_RSA_NO_HASH   0
+#define VIRTIO_CRYPTO_RSA_MD2       1
+#define VIRTIO_CRYPTO_RSA_MD3       2
+#define VIRTIO_CRYPTO_RSA_MD4       3
+#define VIRTIO_CRYPTO_RSA_MD5       4
+#define VIRTIO_CRYPTO_RSA_SHA1      5
+#define VIRTIO_CRYPTO_RSA_SHA256    6
+#define VIRTIO_CRYPTO_RSA_SHA384    7
+#define VIRTIO_CRYPTO_RSA_SHA512    8
+#define VIRTIO_CRYPTO_RSA_SHA224    9
+	__le32 hash_algo;
+};
+
+struct virtio_crypto_ecdsa_session_para {
+#define VIRTIO_CRYPTO_CURVE_UNKNOWN   0
+#define VIRTIO_CRYPTO_CURVE_NIST_P192 1
+#define VIRTIO_CRYPTO_CURVE_NIST_P224 2
+#define VIRTIO_CRYPTO_CURVE_NIST_P256 3
+#define VIRTIO_CRYPTO_CURVE_NIST_P384 4
+#define VIRTIO_CRYPTO_CURVE_NIST_P521 5
+	__le32 curve_id;
+	__le32 padding;
+};
+
+struct virtio_crypto_akcipher_session_para {
+#define VIRTIO_CRYPTO_NO_AKCIPHER    0
+#define VIRTIO_CRYPTO_AKCIPHER_RSA   1
+#define VIRTIO_CRYPTO_AKCIPHER_DSA   2
+#define VIRTIO_CRYPTO_AKCIPHER_ECDSA 3
+	__le32 algo;
+
+#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PUBLIC  1
+#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PRIVATE 2
+	__le32 keytype;
+	__le32 keylen;
+
+	union {
+		struct virtio_crypto_rsa_session_para rsa;
+		struct virtio_crypto_ecdsa_session_para ecdsa;
+	} u;
+};
+
+struct virtio_crypto_akcipher_create_session_req {
+	struct virtio_crypto_akcipher_session_para para;
+	__u8 padding[36];
+};
+
 struct virtio_crypto_alg_chain_session_para {
 #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER  1
 #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH  2
@@ -247,6 +304,8 @@ struct virtio_crypto_op_ctrl_req {
 			mac_create_session;
 		struct virtio_crypto_aead_create_session_req
 			aead_create_session;
+		struct virtio_crypto_akcipher_create_session_req
+			akcipher_create_session;
 		struct virtio_crypto_destroy_session_req
 			destroy_session;
 		__u8 padding[56];
@@ -266,6 +325,14 @@ struct virtio_crypto_op_header {
 	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00)
 #define VIRTIO_CRYPTO_AEAD_DECRYPT \
 	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01)
+#define VIRTIO_CRYPTO_AKCIPHER_ENCRYPT \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x00)
+#define VIRTIO_CRYPTO_AKCIPHER_DECRYPT \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x01)
+#define VIRTIO_CRYPTO_AKCIPHER_SIGN \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x02)
+#define VIRTIO_CRYPTO_AKCIPHER_VERIFY \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x03)
 	__le32 opcode;
 	/* algo should be service-specific algorithms */
 	__le32 algo;
@@ -390,6 +457,16 @@ struct virtio_crypto_aead_data_req {
 	__u8 padding[32];
 };
 
+struct virtio_crypto_akcipher_para {
+	__le32 src_data_len;
+	__le32 dst_data_len;
+};
+
+struct virtio_crypto_akcipher_data_req {
+	struct virtio_crypto_akcipher_para para;
+	__u8 padding[40];
+};
+
 /* The request of the data virtqueue's packet */
 struct virtio_crypto_op_data_req {
 	struct virtio_crypto_op_header header;
@@ -399,6 +476,7 @@ struct virtio_crypto_op_data_req {
 		struct virtio_crypto_hash_data_req hash_req;
 		struct virtio_crypto_mac_data_req mac_req;
 		struct virtio_crypto_aead_data_req aead_req;
+		struct virtio_crypto_akcipher_data_req akcipher_req;
 		__u8 padding[48];
 	} u;
 };
@@ -409,6 +487,7 @@ struct virtio_crypto_op_data_req {
 #define VIRTIO_CRYPTO_NOTSUPP   3
 #define VIRTIO_CRYPTO_INVSESS   4 /* Invalid session id */
 #define VIRTIO_CRYPTO_NOSPC     5 /* no free session ID */
+#define VIRTIO_CRYPTO_KEY_REJECTED 6 /* Signature verification failed */
 
 /* The accelerator hardware is ready */
 #define VIRTIO_CRYPTO_S_HW_READY  (1 << 0)
@@ -439,7 +518,7 @@ struct virtio_crypto_config {
 	__le32 max_cipher_key_len;
 	/* Maximum length of authenticated key */
 	__le32 max_auth_key_len;
-	__le32 reserve;
+	__le32 akcipher_algo;
 	/* Maximum size of each crypto request's content */
 	__le64 max_size;
 };
-- 
cgit v1.2.3


From a61280ddddaa45f95b60dd54c05f8e0e5b6810b7 Mon Sep 17 00:00:00 2001
From: Longpeng <longpeng2@huawei.com>
Date: Tue, 15 Mar 2022 11:25:51 +0800
Subject: vdpa: support exposing the config size to userspace

- GET_CONFIG_SIZE: return the size of the virtio config space.

The size contains the fields which are conditional on feature
bits.

Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Longpeng <longpeng2@huawei.com>
Link: https://lore.kernel.org/r/20220315032553.455-2-longpeng2@huawei.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
---
 include/uapi/linux/vhost.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index c998860d7bbc..bc74e95a273a 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -150,4 +150,8 @@
 /* Get the valid iova range */
 #define VHOST_VDPA_GET_IOVA_RANGE	_IOR(VHOST_VIRTIO, 0x78, \
 					     struct vhost_vdpa_iova_range)
+
+/* Get the config size */
+#define VHOST_VDPA_GET_CONFIG_SIZE	_IOR(VHOST_VIRTIO, 0x79, __u32)
+
 #endif
-- 
cgit v1.2.3


From b04d910af330b55e1d5d6eb9ecd53a375a9cf81c Mon Sep 17 00:00:00 2001
From: Longpeng <longpeng2@huawei.com>
Date: Tue, 15 Mar 2022 11:25:53 +0800
Subject: vdpa: support exposing the count of vqs to userspace

- GET_VQS_COUNT: the count of virtqueues that exposed

Signed-off-by: Longpeng <longpeng2@huawei.com>
Link: https://lore.kernel.org/r/20220315032553.455-4-longpeng2@huawei.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Longpeng &lt;<a href="mailto:longpeng2@huawei.com" target="_blank">longpeng2@huawei.com</a>&gt;<br>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
---
 include/uapi/linux/vhost.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index bc74e95a273a..5d99e7c242a2 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -154,4 +154,7 @@
 /* Get the config size */
 #define VHOST_VDPA_GET_CONFIG_SIZE	_IOR(VHOST_VIRTIO, 0x79, __u32)
 
+/* Get the count of all virtqueues */
+#define VHOST_VDPA_GET_VQS_COUNT	_IOR(VHOST_VIRTIO, 0x80, __u32)
+
 #endif
-- 
cgit v1.2.3


From f941c51eeac7ebe0f8ec30943bf78e7f60aad039 Mon Sep 17 00:00:00 2001
From: Carlos Llamas <cmllamas@google.com>
Date: Tue, 29 Mar 2022 20:18:15 +0000
Subject: loop: fix ioctl calls using compat_loop_info

Support for cryptoloop was deleted in commit 47e9624616c8 ("block:
remove support for cryptoloop and the xor transfer"), making the usage
of loop_info->lo_encrypt_type obsolete. However, this member was also
removed from the compat_loop_info definition and this breaks userspace
ioctl calls for 32-bit binaries and CONFIG_COMPAT=y.

This patch restores the compat_loop_info->lo_encrypt_type member and
marks it obsolete as well as in the uapi header definitions.

Fixes: 47e9624616c8 ("block: remove support for cryptoloop and the xor transfer")
Signed-off-by: Carlos Llamas <cmllamas@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220329201815.1347500-1-cmllamas@google.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/loop.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index 24a1c45bd1ae..98e60801195e 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -45,7 +45,7 @@ struct loop_info {
 	unsigned long	   lo_inode; 		/* ioctl r/o */
 	__kernel_old_dev_t lo_rdevice; 		/* ioctl r/o */
 	int		   lo_offset;
-	int		   lo_encrypt_type;
+	int		   lo_encrypt_type;		/* obsolete, ignored */
 	int		   lo_encrypt_key_size; 	/* ioctl w/o */
 	int		   lo_flags;
 	char		   lo_name[LO_NAME_SIZE];
@@ -61,7 +61,7 @@ struct loop_info64 {
 	__u64		   lo_offset;
 	__u64		   lo_sizelimit;/* bytes, 0 == max available */
 	__u32		   lo_number;			/* ioctl r/o */
-	__u32		   lo_encrypt_type;
+	__u32		   lo_encrypt_type;		/* obsolete, ignored */
 	__u32		   lo_encrypt_key_size;		/* ioctl w/o */
 	__u32		   lo_flags;
 	__u8		   lo_file_name[LO_NAME_SIZE];
-- 
cgit v1.2.3


From 55037ed7bdc62151a726f5685f88afa6a82959b1 Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@linaro.org>
Date: Tue, 29 Mar 2022 10:12:52 -0700
Subject: uapi/linux/stddef.h: Add include guards

Add include guard wrapper define to uapi/linux/stddef.h to prevent macro
redefinition errors when stddef.h is included more than once. This was not
needed before since the only contents already used a redefinition test.

Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>
Link: https://lore.kernel.org/r/20220329171252.57279-1-tadeusz.struk@linaro.org
Fixes: 50d7bd38c3aa ("stddef: Introduce struct_group() helper macro")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/stddef.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
index 3021ea25a284..7837ba4fe728 100644
--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
@@ -1,4 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_STDDEF_H
+#define _UAPI_LINUX_STDDEF_H
+
 #include <linux/compiler_types.h>
 
 #ifndef __always_inline
@@ -41,3 +44,4 @@
 		struct { } __empty_ ## NAME; \
 		TYPE NAME[]; \
 	}
+#endif
-- 
cgit v1.2.3


From 35025735a79eaa894c43837b94fd33c9d6b122df Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 3 Mar 2022 15:41:18 +0000
Subject: KVM: x86/xen: Support direct injection of event channel events

This adds a KVM_XEN_HVM_EVTCHN_SEND ioctl which allows direct injection
of events given an explicit { vcpu, port, priority } in precisely the
same form that those fields are given in the IRQ routing table.

Userspace is currently able to inject 2-level events purely by setting
the bits in the shared_info and vcpu_info, but FIFO event channels are
harder to deal with; we will need the kernel to take sole ownership of
delivery when we support those.

A patch advertising this feature with a new bit in the KVM_CAP_XEN_HVM
ioctl will be added in a subsequent patch.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-9-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 91a6fe4e02c0..49cd2e9e0f6a 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1699,6 +1699,9 @@ struct kvm_xen_hvm_attr {
 #define KVM_XEN_VCPU_GET_ATTR	_IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
 #define KVM_XEN_VCPU_SET_ATTR	_IOW(KVMIO,  0xcb, struct kvm_xen_vcpu_attr)
 
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_HVM_EVTCHN_SEND	_IOW(KVMIO,  0xd0, struct kvm_irq_routing_xen_evtchn)
+
 #define KVM_GET_SREGS2             _IOR(KVMIO,  0xcc, struct kvm_sregs2)
 #define KVM_SET_SREGS2             _IOW(KVMIO,  0xcd, struct kvm_sregs2)
 
-- 
cgit v1.2.3


From 2fd6df2f2b47d4301b1ee0fe9d627d1c061a5988 Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Thu, 3 Mar 2022 15:41:19 +0000
Subject: KVM: x86/xen: intercept EVTCHNOP_send from guests

Userspace registers a sending @port to either deliver to an @eventfd
or directly back to a local event channel port.

After binding events the guest or host may wish to bind those
events to a particular vcpu. This is usually done for unbound
and and interdomain events. Update requests are handled via the
KVM_XEN_EVTCHN_UPDATE flag.

Unregistered ports are handled by the emulator.

Co-developed-by: Ankur Arora <ankur.a.arora@oracle.com>
Co-developed-By: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-10-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 49cd2e9e0f6a..623ed2cb228f 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1686,6 +1686,32 @@ struct kvm_xen_hvm_attr {
 		struct {
 			__u64 gfn;
 		} shared_info;
+		struct {
+			__u32 send_port;
+			__u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
+			__u32 flags;
+#define KVM_XEN_EVTCHN_DEASSIGN		(1 << 0)
+#define KVM_XEN_EVTCHN_UPDATE		(1 << 1)
+#define KVM_XEN_EVTCHN_RESET		(1 << 2)
+			/*
+			 * Events sent by the guest are either looped back to
+			 * the guest itself (potentially on a different port#)
+			 * or signalled via an eventfd.
+			 */
+			union {
+				struct {
+					__u32 port;
+					__u32 vcpu;
+					__u32 priority;
+				} port;
+				struct {
+					__u32 port; /* Zero for eventfd */
+					__s32 fd;
+				} eventfd;
+				__u32 padding[4];
+			} deliver;
+		} evtchn;
+
 		__u64 pad[8];
 	} u;
 };
@@ -1694,6 +1720,8 @@ struct kvm_xen_hvm_attr {
 #define KVM_XEN_ATTR_TYPE_LONG_MODE		0x0
 #define KVM_XEN_ATTR_TYPE_SHARED_INFO		0x1
 #define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR		0x2
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_ATTR_TYPE_EVTCHN		0x3
 
 /* Per-vCPU Xen attributes */
 #define KVM_XEN_VCPU_GET_ATTR	_IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
-- 
cgit v1.2.3


From 942c2490c23f2800ad8143f5eb84a79b859aa743 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 3 Mar 2022 15:41:21 +0000
Subject: KVM: x86/xen: Add KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID

In order to intercept hypercalls such as VCPUOP_set_singleshot_timer, we
need to be aware of the Xen CPU numbering.

This looks a lot like the Hyper-V handling of vpidx, for obvious reasons.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-12-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 623ed2cb228f..4b65e9f0a4d9 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1747,6 +1747,7 @@ struct kvm_xen_vcpu_attr {
 			__u64 time_blocked;
 			__u64 time_offline;
 		} runstate;
+		__u32 vcpu_id;
 	} u;
 };
 
@@ -1757,6 +1758,8 @@ struct kvm_xen_vcpu_attr {
 #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT	0x3
 #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA	0x4
 #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST	0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID		0x6
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
-- 
cgit v1.2.3


From 536395260582be7443b0b35b0bbb89ffe3947f62 Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Thu, 3 Mar 2022 15:41:22 +0000
Subject: KVM: x86/xen: handle PV timers oneshot mode

If the guest has offloaded the timer virq, handle the following
hypercalls for programming the timer:

    VCPUOP_set_singleshot_timer
    VCPUOP_stop_singleshot_timer
    set_timer_op(timestamp_ns)

The event channel corresponding to the timer virq is then used to inject
events once timer deadlines are met. For now we back the PV timer with
hrtimer.

[ dwmw2: Add save/restore, 32-bit compat mode, immediate delivery,
         don't check timer in kvm_vcpu_has_event() ]

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-13-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 4b65e9f0a4d9..cb223e425223 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1748,6 +1748,11 @@ struct kvm_xen_vcpu_attr {
 			__u64 time_offline;
 		} runstate;
 		__u32 vcpu_id;
+		struct {
+			__u32 port;
+			__u32 priority;
+			__u64 expires_ns;
+		} timer;
 	} u;
 };
 
@@ -1760,6 +1765,7 @@ struct kvm_xen_vcpu_attr {
 #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST	0x5
 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID		0x6
+#define KVM_XEN_VCPU_ATTR_TYPE_TIMER		0x7
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
-- 
cgit v1.2.3


From 28d1629f751c4a5f9437fbaa0ee4ed81d1a8e587 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 3 Mar 2022 15:41:23 +0000
Subject: KVM: x86/xen: Kernel acceleration for XENVER_version

Turns out this is a fast path for PV guests because they use it to
trigger the event channel upcall. So letting it bounce all the way up
to userspace is not great.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-14-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cb223e425223..4dda3896ed71 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1711,7 +1711,7 @@ struct kvm_xen_hvm_attr {
 				__u32 padding[4];
 			} deliver;
 		} evtchn;
-
+		__u32 xen_version;
 		__u64 pad[8];
 	} u;
 };
@@ -1722,6 +1722,7 @@ struct kvm_xen_hvm_attr {
 #define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR		0x2
 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
 #define KVM_XEN_ATTR_TYPE_EVTCHN		0x3
+#define KVM_XEN_ATTR_TYPE_XEN_VERSION		0x4
 
 /* Per-vCPU Xen attributes */
 #define KVM_XEN_VCPU_GET_ATTR	_IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
-- 
cgit v1.2.3


From fde0451be8fb3208d4d146b8602d99ee8139e515 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 3 Mar 2022 15:41:24 +0000
Subject: KVM: x86/xen: Support per-vCPU event channel upcall via local APIC

Windows uses a per-vCPU vector, and it's delivered via the local APIC
basically like an MSI (with associated EOI) unlike the traditional
guest-wide vector which is just magically asserted by Xen (and in the
KVM case by kvm_xen_has_interrupt() / kvm_cpu_get_extint()).

Now that the kernel is able to raise event channel events for itself,
being able to do so for Windows guests is also going to be useful.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-15-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 4dda3896ed71..a9ba690c4f37 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1754,6 +1754,7 @@ struct kvm_xen_vcpu_attr {
 			__u32 priority;
 			__u64 expires_ns;
 		} timer;
+		__u8 vector;
 	} u;
 };
 
@@ -1767,6 +1768,7 @@ struct kvm_xen_vcpu_attr {
 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID		0x6
 #define KVM_XEN_VCPU_ATTR_TYPE_TIMER		0x7
+#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR	0x8
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
-- 
cgit v1.2.3


From 661a20fab7d156cf6b9a407c946a1e558a633151 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 3 Mar 2022 15:41:25 +0000
Subject: KVM: x86/xen: Advertise and document KVM_XEN_HVM_CONFIG_EVTCHN_SEND
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At the end of the patch series adding this batch of event channel
acceleration features, finally add the feature bit which advertises
them and document it all.

For SCHEDOP_poll we need to wake a polling vCPU when a given port
is triggered, even when it's masked — and we want to implement that
in the kernel, for efficiency. So we want the kernel to know that it
has sole ownership of event channel delivery. Thus, we allow
userspace to make the 'promise' by setting the corresponding feature
bit in its KVM_XEN_HVM_CONFIG call. As we implement SCHEDOP_poll
bypass later, we will do so only if that promise has been made by
userspace.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-16-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a9ba690c4f37..ee5cc9e2a837 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1232,6 +1232,7 @@ struct kvm_x86_mce {
 #define KVM_XEN_HVM_CONFIG_SHARED_INFO		(1 << 2)
 #define KVM_XEN_HVM_CONFIG_RUNSTATE		(1 << 3)
 #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL	(1 << 4)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND		(1 << 5)
 
 struct kvm_xen_hvm_config {
 	__u32 flags;
-- 
cgit v1.2.3


From ffbb61d09fc56c85e28b110494f3788d0ed4d1f8 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Fri, 25 Feb 2022 14:53:02 +0000
Subject: KVM: x86: Accept KVM_[GS]ET_TSC_KHZ as a VM ioctl.

This sets the default TSC frequency for subsequently created vCPUs.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20220225145304.36166-2-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index ee5cc9e2a837..8616af85dc5d 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1144,6 +1144,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_MEM_OP_EXTENSION 211
 #define KVM_CAP_PMU_CAPABILITY 212
 #define KVM_CAP_DISABLE_QUIRKS2 213
+#define KVM_CAP_VM_TSC_CONTROL 214
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1471,7 +1472,8 @@ struct kvm_s390_ucas_mapping {
 #define KVM_SET_PIT2              _IOW(KVMIO,  0xa0, struct kvm_pit_state2)
 /* Available with KVM_CAP_PPC_GET_PVINFO */
 #define KVM_PPC_GET_PVINFO	  _IOW(KVMIO,  0xa1, struct kvm_ppc_pvinfo)
-/* Available with KVM_CAP_TSC_CONTROL */
+/* Available with KVM_CAP_TSC_CONTROL for a vCPU, or with
+*  KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */
 #define KVM_SET_TSC_KHZ           _IO(KVMIO,  0xa2)
 #define KVM_GET_TSC_KHZ           _IO(KVMIO,  0xa3)
 /* Available with KVM_CAP_PCI_2_3 */
-- 
cgit v1.2.3


From 1cd927ad6f62f27d8908498dcbf61395c5dd5fe2 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Fri, 1 Apr 2022 14:39:03 -0400
Subject: tracing: mark user_events as BROKEN

After being merged, user_events become more visible to a wider audience
that have concerns with the current API.

It is too late to fix this for this release, but instead of a full
revert, just mark it as BROKEN (which prevents it from being selected in
make config).  Then we can work finding a better API.  If that fails,
then it will need to be completely reverted.

To not have the code silently bitrot, still allow building it with
COMPILE_TEST.

And to prevent the uapi header from being installed, then later changed,
and then have an old distro user space see the old version, move the
header file out of the uapi directory.

Surround the include with CONFIG_COMPILE_TEST to the current location,
but when the BROKEN tag is taken off, it will use the uapi directory,
and fail to compile.  This is a good way to remind us to move the header
back.

Link: https://lore.kernel.org/all/20220330155835.5e1f6669@gandalf.local.home
Link: https://lkml.kernel.org/r/20220330201755.29319-1-mathieu.desnoyers@efficios.com
Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/user_events.h | 116 ---------------------------------------
 1 file changed, 116 deletions(-)
 delete mode 100644 include/uapi/linux/user_events.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h
deleted file mode 100644
index e570840571e1..000000000000
--- a/include/uapi/linux/user_events.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Copyright (c) 2021, Microsoft Corporation.
- *
- * Authors:
- *   Beau Belgrave <beaub@linux.microsoft.com>
- */
-#ifndef _UAPI_LINUX_USER_EVENTS_H
-#define _UAPI_LINUX_USER_EVENTS_H
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-#ifdef __KERNEL__
-#include <linux/uio.h>
-#else
-#include <sys/uio.h>
-#endif
-
-#define USER_EVENTS_SYSTEM "user_events"
-#define USER_EVENTS_PREFIX "u:"
-
-/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */
-#define EVENT_BIT_FTRACE 0
-#define EVENT_BIT_PERF 1
-#define EVENT_BIT_OTHER 7
-
-#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE)
-#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF)
-#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER)
-
-/* Create dynamic location entry within a 32-bit value */
-#define DYN_LOC(offset, size) ((size) << 16 | (offset))
-
-/* Use raw iterator for attached BPF program(s), no affect on ftrace/perf */
-#define FLAG_BPF_ITER (1 << 0)
-
-/*
- * Describes an event registration and stores the results of the registration.
- * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum
- * must set the size and name_args before invocation.
- */
-struct user_reg {
-
-	/* Input: Size of the user_reg structure being used */
-	__u32 size;
-
-	/* Input: Pointer to string with event name, description and flags */
-	__u64 name_args;
-
-	/* Output: Byte index of the event within the status page */
-	__u32 status_index;
-
-	/* Output: Index of the event to use when writing data */
-	__u32 write_index;
-};
-
-#define DIAG_IOC_MAGIC '*'
-
-/* Requests to register a user_event */
-#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg*)
-
-/* Requests to delete a user_event */
-#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*)
-
-/* Data type that was passed to the BPF program */
-enum {
-	/* Data resides in kernel space */
-	USER_BPF_DATA_KERNEL,
-
-	/* Data resides in user space */
-	USER_BPF_DATA_USER,
-
-	/* Data is a pointer to a user_bpf_iter structure */
-	USER_BPF_DATA_ITER,
-};
-
-/*
- * Describes an iovec iterator that BPF programs can use to access data for
- * a given user_event write() / writev() call.
- */
-struct user_bpf_iter {
-
-	/* Offset of the data within the first iovec */
-	__u32 iov_offset;
-
-	/* Number of iovec structures */
-	__u32 nr_segs;
-
-	/* Pointer to iovec structures */
-	const struct iovec *iov;
-};
-
-/* Context that BPF programs receive when attached to a user_event */
-struct user_bpf_context {
-
-	/* Data type being passed (see union below) */
-	__u32 data_type;
-
-	/* Length of the data */
-	__u32 data_len;
-
-	/* Pointer to data, varies by data type */
-	union {
-		/* Kernel data (data_type == USER_BPF_DATA_KERNEL) */
-		void *kdata;
-
-		/* User data (data_type == USER_BPF_DATA_USER) */
-		void *udata;
-
-		/* Direct iovec (data_type == USER_BPF_DATA_ITER) */
-		struct user_bpf_iter *iter;
-	};
-};
-
-#endif /* _UAPI_LINUX_USER_EVENTS_H */
-- 
cgit v1.2.3


From 66df0fdb5981052f3ad97c9879eda93712bdefc2 Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@intel.com>
Date: Sun, 3 Apr 2022 19:53:26 +0800
Subject: bpf: Correct the comment for BTF kind bitfield

The commit 8fd886911a6a ("bpf: Add BTF_KIND_FLOAT to uapi") has extended
the BTF kind bitfield from 4 to 5 bits, correct the comment.

Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220403115327.205964-1-haiyue.wang@intel.com
---
 include/uapi/linux/btf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index b0d8fea1951d..a9162a6c0284 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -33,8 +33,8 @@ struct btf_type {
 	/* "info" bits arrangement
 	 * bits  0-15: vlen (e.g. # of struct's members)
 	 * bits 16-23: unused
-	 * bits 24-27: kind (e.g. int, ptr, array...etc)
-	 * bits 28-30: unused
+	 * bits 24-28: kind (e.g. int, ptr, array...etc)
+	 * bits 29-30: unused
 	 * bit     31: kind_flag, currently used by
 	 *             struct, union and fwd
 	 */
-- 
cgit v1.2.3


From f56b919fa4f1b27c589e71f7d90e9785f9196bf1 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 16 Feb 2022 09:39:22 +0100
Subject: linux/fb.h: Spelling s/palette/palette/

Fix a misspelling of "palette" in a comment.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 include/uapi/linux/fb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/fb.h b/include/uapi/linux/fb.h
index 4c14e8be7267..3a49913d006c 100644
--- a/include/uapi/linux/fb.h
+++ b/include/uapi/linux/fb.h
@@ -182,7 +182,7 @@ struct fb_fix_screeninfo {
  *
  * For pseudocolor: offset and length should be the same for all color
  * components. Offset specifies the position of the least significant bit
- * of the pallette index in a pixel value. Length indicates the number
+ * of the palette index in a pixel value. Length indicates the number
  * of available palette entries (i.e. # of entries = 1 << length).
  */
 struct fb_bitfield {
-- 
cgit v1.2.3


From 1ee375d77bb944321c969b456aa73994566cecf6 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Mon, 4 Apr 2022 10:54:47 -0700
Subject: net, uapi: remove inclusion of arpa/inet.h

In include/uapi/linux/tipc_config.h, there's a comment that it includes
arpa/inet.h for ntohs; but ntohs is not defined in any UAPI header. For
now, reuse the definitions from include/linux/byteorder/generic.h, since
the various conversion functions do exist in UAPI headers:
include/uapi/linux/byteorder/big_endian.h
include/uapi/linux/byteorder/little_endian.h

We would like to get to the point where we can build UAPI header tests
with -nostdinc, meaning that kernel UAPI headers should not have a
circular dependency on libc headers.

Link: https://android-review.googlesource.com/c/platform/bionic/+/2048127
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_config.h | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index 4dfc05651c98..c00adf2fe868 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -43,10 +43,6 @@
 #include <linux/tipc.h>
 #include <asm/byteorder.h>
 
-#ifndef __KERNEL__
-#include <arpa/inet.h> /* for ntohs etc. */
-#endif
-
 /*
  * Configuration
  *
@@ -269,33 +265,33 @@ static inline int TLV_OK(const void *tlv, __u16 space)
 	 */
 
 	return (space >= TLV_SPACE(0)) &&
-		(ntohs(((struct tlv_desc *)tlv)->tlv_len) <= space);
+		(__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_len) <= space);
 }
 
 static inline int TLV_CHECK(const void *tlv, __u16 space, __u16 exp_type)
 {
 	return TLV_OK(tlv, space) &&
-		(ntohs(((struct tlv_desc *)tlv)->tlv_type) == exp_type);
+		(__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_type) == exp_type);
 }
 
 static inline int TLV_GET_LEN(struct tlv_desc *tlv)
 {
-	return ntohs(tlv->tlv_len);
+	return __be16_to_cpu(tlv->tlv_len);
 }
 
 static inline void TLV_SET_LEN(struct tlv_desc *tlv, __u16 len)
 {
-	tlv->tlv_len = htons(len);
+	tlv->tlv_len = __cpu_to_be16(len);
 }
 
 static inline int TLV_CHECK_TYPE(struct tlv_desc *tlv,  __u16 type)
 {
-	return (ntohs(tlv->tlv_type) == type);
+	return (__be16_to_cpu(tlv->tlv_type) == type);
 }
 
 static inline void TLV_SET_TYPE(struct tlv_desc *tlv, __u16 type)
 {
-	tlv->tlv_type = htons(type);
+	tlv->tlv_type = __cpu_to_be16(type);
 }
 
 static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
@@ -305,8 +301,8 @@ static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
 
 	tlv_len = TLV_LENGTH(len);
 	tlv_ptr = (struct tlv_desc *)tlv;
-	tlv_ptr->tlv_type = htons(type);
-	tlv_ptr->tlv_len  = htons(tlv_len);
+	tlv_ptr->tlv_type = __cpu_to_be16(type);
+	tlv_ptr->tlv_len  = __cpu_to_be16(tlv_len);
 	if (len && data) {
 		memcpy(TLV_DATA(tlv_ptr), data, len);
 		memset((char *)TLV_DATA(tlv_ptr) + len, 0, TLV_SPACE(len) - tlv_len);
@@ -348,7 +344,7 @@ static inline void *TLV_LIST_DATA(struct tlv_list_desc *list)
 
 static inline void TLV_LIST_STEP(struct tlv_list_desc *list)
 {
-	__u16 tlv_space = TLV_ALIGN(ntohs(list->tlv_ptr->tlv_len));
+	__u16 tlv_space = TLV_ALIGN(__be16_to_cpu(list->tlv_ptr->tlv_len));
 
 	list->tlv_ptr = (struct tlv_desc *)((char *)list->tlv_ptr + tlv_space);
 	list->tlv_space -= tlv_space;
@@ -404,9 +400,9 @@ static inline int TCM_SET(void *msg, __u16 cmd, __u16 flags,
 
 	msg_len = TCM_LENGTH(data_len);
 	tcm_hdr = (struct tipc_cfg_msg_hdr *)msg;
-	tcm_hdr->tcm_len   = htonl(msg_len);
-	tcm_hdr->tcm_type  = htons(cmd);
-	tcm_hdr->tcm_flags = htons(flags);
+	tcm_hdr->tcm_len   = __cpu_to_be32(msg_len);
+	tcm_hdr->tcm_type  = __cpu_to_be16(cmd);
+	tcm_hdr->tcm_flags = __cpu_to_be16(flags);
 	if (data_len && data) {
 		memcpy(TCM_DATA(msg), data, data_len);
 		memset((char *)TCM_DATA(msg) + data_len, 0, TCM_SPACE(data_len) - msg_len);
-- 
cgit v1.2.3


From fce96cf0443083e37455eff8f78fd240c621dae3 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Mon, 7 Mar 2022 15:33:53 -0600
Subject: virt: Add SEV-SNP guest driver

The SEV-SNP specification provides the guest a mechanism to communicate
with the PSP without risk from a malicious hypervisor who wishes to
read, alter, drop or replay the messages sent. The driver uses
snp_issue_guest_request() to issue GHCB SNP_GUEST_REQUEST or
SNP_EXT_GUEST_REQUEST NAE events to submit the request to PSP.

The PSP requires that all communication should be encrypted using key
specified through a struct snp_guest_platform_data descriptor.

Userspace can use SNP_GET_REPORT ioctl() to query the guest attestation
report.

See SEV-SNP spec section Guest Messages for more details.

  [ bp: Remove the "what" from the commit message, massage. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-44-brijesh.singh@amd.com
---
 include/uapi/linux/sev-guest.h | 50 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 include/uapi/linux/sev-guest.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h
new file mode 100644
index 000000000000..38f11d723c68
--- /dev/null
+++ b/include/uapi/linux/sev-guest.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Userspace interface for AMD SEV and SNP guest driver.
+ *
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * SEV API specification is available at: https://developer.amd.com/sev/
+ */
+
+#ifndef __UAPI_LINUX_SEV_GUEST_H_
+#define __UAPI_LINUX_SEV_GUEST_H_
+
+#include <linux/types.h>
+
+struct snp_report_req {
+	/* user data that should be included in the report */
+	__u8 user_data[64];
+
+	/* The vmpl level to be included in the report */
+	__u32 vmpl;
+
+	/* Must be zero filled */
+	__u8 rsvd[28];
+};
+
+struct snp_report_resp {
+	/* response data, see SEV-SNP spec for the format */
+	__u8 data[4000];
+};
+
+struct snp_guest_request_ioctl {
+	/* message version number (must be non-zero) */
+	__u8 msg_version;
+
+	/* Request and response structure address */
+	__u64 req_data;
+	__u64 resp_data;
+
+	/* firmware error code on failure (see psp-sev.h) */
+	__u64 fw_err;
+};
+
+#define SNP_GUEST_REQ_IOC_TYPE	'S'
+
+/* Get SNP attestation report */
+#define SNP_GET_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x0, struct snp_guest_request_ioctl)
+
+#endif /* __UAPI_LINUX_SEV_GUEST_H_ */
-- 
cgit v1.2.3


From 68de0b2f938642079c0c853b219bdb88c4dc4d13 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Thu, 24 Feb 2022 10:56:23 -0600
Subject: virt: sevguest: Add support to derive key

The SNP_GET_DERIVED_KEY ioctl interface can be used by the SNP guest to
ask the firmware to provide a key derived from a root key. The derived
key may be used by the guest for any purposes it chooses, such as a
sealing key or communicating with the external entities.

See SEV-SNP firmware spec for more information.

  [ bp: No need to memset "req" - it will get overwritten. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
Link: https://lore.kernel.org/r/20220307213356.2797205-45-brijesh.singh@amd.com
---
 include/uapi/linux/sev-guest.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h
index 38f11d723c68..598367f12064 100644
--- a/include/uapi/linux/sev-guest.h
+++ b/include/uapi/linux/sev-guest.h
@@ -30,6 +30,20 @@ struct snp_report_resp {
 	__u8 data[4000];
 };
 
+struct snp_derived_key_req {
+	__u32 root_key_select;
+	__u32 rsvd;
+	__u64 guest_field_select;
+	__u32 vmpl;
+	__u32 guest_svn;
+	__u64 tcb_version;
+};
+
+struct snp_derived_key_resp {
+	/* response data, see SEV-SNP spec for the format */
+	__u8 data[64];
+};
+
 struct snp_guest_request_ioctl {
 	/* message version number (must be non-zero) */
 	__u8 msg_version;
@@ -47,4 +61,7 @@ struct snp_guest_request_ioctl {
 /* Get SNP attestation report */
 #define SNP_GET_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x0, struct snp_guest_request_ioctl)
 
+/* Get a derived key from the root */
+#define SNP_GET_DERIVED_KEY _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x1, struct snp_guest_request_ioctl)
+
 #endif /* __UAPI_LINUX_SEV_GUEST_H_ */
-- 
cgit v1.2.3


From d80b494f712317493d464a55652698c4d1b7bb0f Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Mon, 7 Mar 2022 15:33:55 -0600
Subject: virt: sevguest: Add support to get extended report

Version 2 of GHCB specification defines Non-Automatic-Exit (NAE) to get
extended guest report which is similar to the SNP_GET_REPORT ioctl. The
main difference is related to the additional data that will be returned.

That additional data returned is a certificate blob that can be used by
the SNP guest user. The certificate blob layout is defined in the GHCB
specification. The driver simply treats the blob as a opaque data and
copies it to userspace.

  [ bp: Massage commit message, cast 1st arg of access_ok() ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-46-brijesh.singh@amd.com
---
 include/uapi/linux/sev-guest.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h
index 598367f12064..256aaeff7e65 100644
--- a/include/uapi/linux/sev-guest.h
+++ b/include/uapi/linux/sev-guest.h
@@ -56,6 +56,16 @@ struct snp_guest_request_ioctl {
 	__u64 fw_err;
 };
 
+struct snp_ext_report_req {
+	struct snp_report_req data;
+
+	/* where to copy the certificate blob */
+	__u64 certs_address;
+
+	/* length of the certificate blob */
+	__u32 certs_len;
+};
+
 #define SNP_GUEST_REQ_IOC_TYPE	'S'
 
 /* Get SNP attestation report */
@@ -64,4 +74,7 @@ struct snp_guest_request_ioctl {
 /* Get a derived key from the root */
 #define SNP_GET_DERIVED_KEY _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x1, struct snp_guest_request_ioctl)
 
+/* Get SNP extended report as defined in the GHCB specification version 2. */
+#define SNP_GET_EXT_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x2, struct snp_guest_request_ioctl)
+
 #endif /* __UAPI_LINUX_SEV_GUEST_H_ */
-- 
cgit v1.2.3


From 794c24e9921f32ded4422833a990ccf11dc3c00e Mon Sep 17 00:00:00 2001
From: Jeffrey Ji <jeffreyji@google.com>
Date: Wed, 6 Apr 2022 17:26:00 +0000
Subject: net-core: rx_otherhost_dropped to core_stats

Increment rx_otherhost_dropped counter when packet dropped due to
mismatched dest MAC addr.

An example when this drop can occur is when manually crafting raw
packets that will be consumed by a user space application via a tap
device. For testing purposes local traffic was generated using trafgen
for the client and netcat to start a server

Tested: Created 2 netns, sent 1 packet using trafgen from 1 to the other
with "{eth(daddr=$INCORRECT_MAC...}", verified that iproute2 showed the
counter was incremented. (Also had to modify iproute2 to show the stat,
additional patch for that coming next.)

Signed-off-by: Jeffrey Ji <jeffreyji@google.com>
Reviewed-by: Brian Vazquez <brianvv@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20220406172600.1141083-1-jeffreyjilinux@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_link.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index cc284c048e69..d1e600816b82 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -211,6 +211,9 @@ struct rtnl_link_stats {
  * @rx_nohandler: Number of packets received on the interface
  *   but dropped by the networking stack because the device is
  *   not designated to receive packets (e.g. backup link in a bond).
+ *
+ * @rx_otherhost_dropped: Number of packets dropped due to mismatch
+ *   in destination MAC address.
  */
 struct rtnl_link_stats64 {
 	__u64	rx_packets;
@@ -243,6 +246,8 @@ struct rtnl_link_stats64 {
 	__u64	rx_compressed;
 	__u64	tx_compressed;
 	__u64	rx_nohandler;
+
+	__u64	rx_otherhost_dropped;
 };
 
 /* Subset of link stats useful for in-HW collection. Meaning of the fields is as
-- 
cgit v1.2.3


From c4212f3eb89fd5654f0a6ed2ee1d13fcb86cb664 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sun, 10 Apr 2022 15:13:24 -0600
Subject: io_uring: flag the fact that linked file assignment is sane

Give applications a way to tell if the kernel supports sane linked files,
as in files being assigned at the right time to be able to reliably
do <open file direct into slot X><read file from slot X> while using
IOSQE_IO_LINK to order them.

Not really a bug fix, but flag it as such so that it gets pulled in with
backports of the deferred file assignment.

Fixes: 6bf9c47a3989 ("io_uring: defer file assignment")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 784adc6f6ed2..1845cf7c80ba 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -296,6 +296,7 @@ struct io_uring_params {
 #define IORING_FEAT_NATIVE_WORKERS	(1U << 9)
 #define IORING_FEAT_RSRC_TAGS		(1U << 10)
 #define IORING_FEAT_CQE_SKIP		(1U << 11)
+#define IORING_FEAT_LINKED_FILE		(1U << 12)
 
 /*
  * io_uring_register(2) opcodes and arguments
-- 
cgit v1.2.3


From 2d7991fe867974a8e5065ee9691451a406b9320d Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 11 Mar 2022 16:23:22 -0700
Subject: dmaengine: idxd: update IAA definitions for user header

Add additional structure definitions for Intel In-memory Analytics
Accelerator (IAA/IAX). See specification (1) for more details.

1: https://cdrdv2.intel.com/v1/dl/getContent/721858

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/164704100212.1373038.18362680016033557757.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/uapi/linux/idxd.h | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h
index a8f0ff75c430..bce7c43657d5 100644
--- a/include/uapi/linux/idxd.h
+++ b/include/uapi/linux/idxd.h
@@ -53,6 +53,11 @@ enum idxd_scmd_stat {
 
 /* IAX */
 #define IDXD_OP_FLAG_RD_SRC2_AECS	0x010000
+#define IDXD_OP_FLAG_RD_SRC2_2ND	0x020000
+#define IDXD_OP_FLAG_WR_SRC2_AECS_COMP	0x040000
+#define IDXD_OP_FLAG_WR_SRC2_AECS_OVFL	0x080000
+#define IDXD_OP_FLAG_SRC2_STS		0x100000
+#define IDXD_OP_FLAG_CRC_RFC3720	0x200000
 
 /* Opcode */
 enum dsa_opcode {
@@ -81,6 +86,18 @@ enum iax_opcode {
 	IAX_OPCODE_MEMMOVE,
 	IAX_OPCODE_DECOMPRESS = 0x42,
 	IAX_OPCODE_COMPRESS,
+	IAX_OPCODE_CRC64,
+	IAX_OPCODE_ZERO_DECOMP_32 = 0x48,
+	IAX_OPCODE_ZERO_DECOMP_16,
+	IAX_OPCODE_DECOMP_32 = 0x4c,
+	IAX_OPCODE_DECOMP_16,
+	IAX_OPCODE_SCAN = 0x50,
+	IAX_OPCODE_SET_MEMBER,
+	IAX_OPCODE_EXTRACT,
+	IAX_OPCODE_SELECT,
+	IAX_OPCODE_RLE_BURST,
+	IAX_OPCDE_FIND_UNIQUE,
+	IAX_OPCODE_EXPAND,
 };
 
 /* Completion record status */
@@ -120,6 +137,7 @@ enum iax_completion_status {
 	IAX_COMP_NONE = 0,
 	IAX_COMP_SUCCESS,
 	IAX_COMP_PAGE_FAULT_IR = 0x04,
+	IAX_COMP_ANALYTICS_ERROR = 0x0a,
 	IAX_COMP_OUTBUF_OVERFLOW,
 	IAX_COMP_BAD_OPCODE = 0x10,
 	IAX_COMP_INVALID_FLAGS,
@@ -140,7 +158,10 @@ enum iax_completion_status {
 	IAX_COMP_WATCHDOG,
 	IAX_COMP_INVALID_COMP_FLAG = 0x30,
 	IAX_COMP_INVALID_FILTER_FLAG,
-	IAX_COMP_INVALID_NUM_ELEMS = 0x33,
+	IAX_COMP_INVALID_INPUT_SIZE,
+	IAX_COMP_INVALID_NUM_ELEMS,
+	IAX_COMP_INVALID_SRC1_WIDTH,
+	IAX_COMP_INVALID_INVERT_OUT,
 };
 
 #define DSA_COMP_STATUS_MASK		0x7f
@@ -319,8 +340,12 @@ struct iax_completion_record {
 	uint32_t                output_size;
 	uint8_t                 output_bits;
 	uint8_t                 rsvd3;
-	uint16_t                rsvd4;
-	uint64_t                rsvd5[4];
+	uint16_t                xor_csum;
+	uint32_t                crc;
+	uint32_t                min;
+	uint32_t                max;
+	uint32_t                sum;
+	uint64_t                rsvd4[2];
 } __attribute__((packed));
 
 struct iax_raw_completion_record {
-- 
cgit v1.2.3


From 545528d788556c724eeb5400757f828ef27782a8 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <razor@blackwall.org>
Date: Wed, 13 Apr 2022 13:51:54 +0300
Subject: net: netlink: add NLM_F_BULK delete request modifier

Add a new delete request modifier called NLM_F_BULK which, when
supported, would cause the request to delete multiple objects. The flag
is a convenient way to signal that a multiple delete operation is
requested which can be gradually added to different delete requests. In
order to make sure older kernels will error out if the operation is not
supported instead of doing something unintended we have to break a
required condition when implementing support for this flag, f.e. for
neighbors we will omit the mandatory mac address attribute.
Initially it will be used to add flush with filtering support for bridge
fdbs, but it also opens the door to add similar support to others.

Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 4c0cde075c27..855dffb4c1c3 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -72,6 +72,7 @@ struct nlmsghdr {
 
 /* Modifiers to DELETE request */
 #define NLM_F_NONREC	0x100	/* Do not delete recursively	*/
+#define NLM_F_BULK	0x200	/* Delete multiple objects	*/
 
 /* Flags for ACK message */
 #define NLM_F_CAPPED	0x100	/* request was capped */
-- 
cgit v1.2.3


From ea2c0f9e3fc2f94f090d693b7235c02af1289629 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <razor@blackwall.org>
Date: Wed, 13 Apr 2022 13:52:00 +0300
Subject: net: rtnetlink: add ndm flags and state mask attributes

Add ndm flags/state masks which will be used for bulk delete filtering.
All of these are used by the bridge and vxlan drivers. Also minimal attr
policy validation is added, it is up to ndo_fdb_del_bulk implementers to
further validate them.

Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/neighbour.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index db05fb55055e..39c565e460c7 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -32,6 +32,8 @@ enum {
 	NDA_NH_ID,
 	NDA_FDB_EXT_ATTRS,
 	NDA_FLAGS_EXT,
+	NDA_NDM_STATE_MASK,
+	NDA_NDM_FLAGS_MASK,
 	__NDA_MAX
 };
 
-- 
cgit v1.2.3


From c24a950ec7d60c4da91dc3f273295c7f438b531e Mon Sep 17 00:00:00 2001
From: Peter Gonda <pgonda@google.com>
Date: Thu, 7 Apr 2022 14:02:33 -0700
Subject: KVM, SEV: Add KVM_EXIT_SHUTDOWN metadata for SEV-ES

If an SEV-ES guest requests termination, exit to userspace with
KVM_EXIT_SYSTEM_EVENT and a dedicated SEV_TERM type instead of -EINVAL
so that userspace can take appropriate action.

See AMD's GHCB spec section '4.1.13 Termination Request' for more details.

Suggested-by: Sean Christopherson <seanjc@google.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Peter Gonda <pgonda@google.com>

Reported-by: kernel test robot <lkp@intel.com>
Message-Id: <20220407210233.782250-1-pgonda@google.com>
[Add documentatino. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 8616af85dc5d..dd1d8167e71f 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -444,8 +444,11 @@ struct kvm_run {
 #define KVM_SYSTEM_EVENT_SHUTDOWN       1
 #define KVM_SYSTEM_EVENT_RESET          2
 #define KVM_SYSTEM_EVENT_CRASH          3
+#define KVM_SYSTEM_EVENT_SEV_TERM       4
+#define KVM_SYSTEM_EVENT_NDATA_VALID    (1u << 31)
 			__u32 type;
-			__u64 flags;
+			__u32 ndata;
+			__u64 data[16];
 		} system_event;
 		/* KVM_EXIT_S390_STSI */
 		struct {
-- 
cgit v1.2.3


From 4dc84c06a343fcb95fd5a0acb537aefa4ebdd1b0 Mon Sep 17 00:00:00 2001
From: Jie Wang <wangjie125@huawei.com>
Date: Tue, 12 Apr 2022 10:01:19 +0800
Subject: net: ethtool: extend ringparam set/get APIs for tx_push

Currently tx push is a standard driver feature which controls use of a fast
path descriptor push. So this patch extends the ringparam APIs and data
structures to support set/get tx push by ethtool -G/g.

Signed-off-by: Jie Wang <wangjie125@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ethtool_netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 979850221b8d..d2fb4f7be61b 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -338,6 +338,7 @@ enum {
 	ETHTOOL_A_RINGS_RX_BUF_LEN,                     /* u32 */
 	ETHTOOL_A_RINGS_TCP_DATA_SPLIT,			/* u8 */
 	ETHTOOL_A_RINGS_CQE_SIZE,			/* u32 */
+	ETHTOOL_A_RINGS_TX_PUSH,			/* u8 */
 
 	/* add new constants above here */
 	__ETHTOOL_A_RINGS_CNT,
-- 
cgit v1.2.3


From f9a2fb73318eb4dbf8cd84866b8b0dd012d8b116 Mon Sep 17 00:00:00 2001
From: Arun Ajith S <aajith@arista.com>
Date: Fri, 15 Apr 2022 08:34:02 +0000
Subject: net/ipv6: Introduce accept_unsolicited_na knob to implement
 router-side changes for RFC9131

Add a new neighbour cache entry in STALE state for routers on receiving
an unsolicited (gratuitous) neighbour advertisement with
target link-layer-address option specified.
This is similar to the arp_accept configuration for IPv4.
A new sysctl endpoint is created to turn on this behaviour:
/proc/sys/net/ipv6/conf/interface/accept_unsolicited_na.

Signed-off-by: Arun Ajith S <aajith@arista.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ipv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index d4178dace0bf..549ddeaf788b 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -194,6 +194,7 @@ enum {
 	DEVCONF_IOAM6_ID,
 	DEVCONF_IOAM6_ID_WIDE,
 	DEVCONF_NDISC_EVICT_NOCARRIER,
+	DEVCONF_ACCEPT_UNSOLICITED_NA,
 	DEVCONF_MAX
 };
 
-- 
cgit v1.2.3


From c246f9b5fd617fe487f8b6f18851703f468501d6 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 18 Apr 2022 09:42:25 +0300
Subject: devlink: add support to create line card and expose to user

Extend the devlink API so the driver is going to be able to create and
destroy linecard instances. There can be multiple line cards per devlink
device. Expose this new type of object over devlink netlink API to the
userspace, with notifications.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index b897b80770f6..59c33ed2d3e7 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -131,6 +131,11 @@ enum devlink_command {
 	DEVLINK_CMD_RATE_NEW,
 	DEVLINK_CMD_RATE_DEL,
 
+	DEVLINK_CMD_LINECARD_GET,		/* can dump */
+	DEVLINK_CMD_LINECARD_SET,
+	DEVLINK_CMD_LINECARD_NEW,
+	DEVLINK_CMD_LINECARD_DEL,
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -553,6 +558,8 @@ enum devlink_attr {
 
 	DEVLINK_ATTR_REGION_MAX_SNAPSHOTS,	/* u32 */
 
+	DEVLINK_ATTR_LINECARD_INDEX,		/* u32 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
-- 
cgit v1.2.3


From fcdc8ce23a309c26a67fc613a741d9b21a248311 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 18 Apr 2022 09:42:26 +0300
Subject: devlink: implement line card provisioning

In order to be able to configure all needed stuff on a port/netdevice
of a line card without the line card being present, introduce line card
provisioning. Basically by setting a type, provisioning process will
start and driver is supposed to create a placeholder for instances
(ports/netdevices) for a line card type.

Allow the user to query the supported line card types over line card
get command. Then implement two netlink command SET to allow user to
set/unset the card type.

On the driver API side, add provision/unprovision ops and supported
types array to be advertised. Upon provision op call, the driver should
take care of creating the instances for the particular line card type.
Introduce provision_set/clear() functions to be called by the driver
once the provisioning/unprovisioning is done on its side. These helpers
are not to be called directly due to the async nature of provisioning.

Example:
$ devlink port # No ports are listed
$ devlink lc
pci/0000:01:00.0:
  lc 1 state unprovisioned
    supported_types:
       16x100G
  lc 2 state unprovisioned
    supported_types:
       16x100G
  lc 3 state unprovisioned
    supported_types:
       16x100G
  lc 4 state unprovisioned
    supported_types:
       16x100G
  lc 5 state unprovisioned
    supported_types:
       16x100G
  lc 6 state unprovisioned
    supported_types:
       16x100G
  lc 7 state unprovisioned
    supported_types:
       16x100G
  lc 8 state unprovisioned
    supported_types:
       16x100G

$ devlink lc set pci/0000:01:00.0 lc 8 type 16x100G
$ devlink lc show pci/0000:01:00.0 lc 8
pci/0000:01:00.0:
  lc 8 state active type 16x100G
    supported_types:
       16x100G
$ devlink port
pci/0000:01:00.0/0: type notset flavour cpu port 0 splittable false
pci/0000:01:00.0/53: type eth netdev enp1s0nl8p1 flavour physical lc 8 port 1 splittable true lanes 4
pci/0000:01:00.0/54: type eth netdev enp1s0nl8p2 flavour physical lc 8 port 2 splittable true lanes 4
pci/0000:01:00.0/55: type eth netdev enp1s0nl8p3 flavour physical lc 8 port 3 splittable true lanes 4
pci/0000:01:00.0/56: type eth netdev enp1s0nl8p4 flavour physical lc 8 port 4 splittable true lanes 4
pci/0000:01:00.0/57: type eth netdev enp1s0nl8p5 flavour physical lc 8 port 5 splittable true lanes 4
pci/0000:01:00.0/58: type eth netdev enp1s0nl8p6 flavour physical lc 8 port 6 splittable true lanes 4
pci/0000:01:00.0/59: type eth netdev enp1s0nl8p7 flavour physical lc 8 port 7 splittable true lanes 4
pci/0000:01:00.0/60: type eth netdev enp1s0nl8p8 flavour physical lc 8 port 8 splittable true lanes 4
pci/0000:01:00.0/61: type eth netdev enp1s0nl8p9 flavour physical lc 8 port 9 splittable true lanes 4
pci/0000:01:00.0/62: type eth netdev enp1s0nl8p10 flavour physical lc 8 port 10 splittable true lanes 4
pci/0000:01:00.0/63: type eth netdev enp1s0nl8p11 flavour physical lc 8 port 11 splittable true lanes 4
pci/0000:01:00.0/64: type eth netdev enp1s0nl8p12 flavour physical lc 8 port 12 splittable true lanes 4
pci/0000:01:00.0/125: type eth netdev enp1s0nl8p13 flavour physical lc 8 port 13 splittable true lanes 4
pci/0000:01:00.0/126: type eth netdev enp1s0nl8p14 flavour physical lc 8 port 14 splittable true lanes 4
pci/0000:01:00.0/127: type eth netdev enp1s0nl8p15 flavour physical lc 8 port 15 splittable true lanes 4
pci/0000:01:00.0/128: type eth netdev enp1s0nl8p16 flavour physical lc 8 port 16 splittable true lanes 4

$ devlink lc set pci/0000:01:00.0 lc 8 notype

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 59c33ed2d3e7..de91e4a0d476 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -343,6 +343,18 @@ enum devlink_reload_limit {
 
 #define DEVLINK_RELOAD_LIMITS_VALID_MASK (_BITUL(__DEVLINK_RELOAD_LIMIT_MAX) - 1)
 
+enum devlink_linecard_state {
+	DEVLINK_LINECARD_STATE_UNSPEC,
+	DEVLINK_LINECARD_STATE_UNPROVISIONED,
+	DEVLINK_LINECARD_STATE_UNPROVISIONING,
+	DEVLINK_LINECARD_STATE_PROVISIONING,
+	DEVLINK_LINECARD_STATE_PROVISIONING_FAILED,
+	DEVLINK_LINECARD_STATE_PROVISIONED,
+
+	__DEVLINK_LINECARD_STATE_MAX,
+	DEVLINK_LINECARD_STATE_MAX = __DEVLINK_LINECARD_STATE_MAX - 1
+};
+
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
 	DEVLINK_ATTR_UNSPEC,
@@ -559,6 +571,9 @@ enum devlink_attr {
 	DEVLINK_ATTR_REGION_MAX_SNAPSHOTS,	/* u32 */
 
 	DEVLINK_ATTR_LINECARD_INDEX,		/* u32 */
+	DEVLINK_ATTR_LINECARD_STATE,		/* u8 */
+	DEVLINK_ATTR_LINECARD_TYPE,		/* string */
+	DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES,	/* nested */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
-- 
cgit v1.2.3


From fc9f50d5b366cd9f35bdee22fe3f8d77833cb1d8 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 18 Apr 2022 09:42:27 +0300
Subject: devlink: implement line card active state

Allow driver to mark a line card as active. Expose this state to the
userspace over devlink netlink interface with proper notifications.
'active' state means that line card was plugged in after
being provisioned.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index de91e4a0d476..b3d40a5d72ff 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -350,6 +350,7 @@ enum devlink_linecard_state {
 	DEVLINK_LINECARD_STATE_PROVISIONING,
 	DEVLINK_LINECARD_STATE_PROVISIONING_FAILED,
 	DEVLINK_LINECARD_STATE_PROVISIONED,
+	DEVLINK_LINECARD_STATE_ACTIVE,
 
 	__DEVLINK_LINECARD_STATE_MAX,
 	DEVLINK_LINECARD_STATE_MAX = __DEVLINK_LINECARD_STATE_MAX - 1
-- 
cgit v1.2.3


From 38a6f0865796e26fc38fff4858f681d9ae76fa0f Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Sat, 16 Apr 2022 00:40:46 +0800
Subject: net: sched: support hash selecting tx queue

This patch allows users to pick queue_mapping, range
from A to B. Then we can load balance packets from A
to B tx queue. The range is an unsigned 16bit value
in decimal format.

$ tc filter ... action skbedit queue_mapping skbhash A B

"skbedit queue_mapping QUEUE_MAPPING" (from "man 8 tc-skbedit")
is enhanced with flags: SKBEDIT_F_TXQ_SKBHASH

  +----+      +----+      +----+
  | P1 |      | P2 |      | Pn |
  +----+      +----+      +----+
    |           |           |
    +-----------+-----------+
                |
                | clsact/skbedit
                |      MQ
                v
    +-----------+-----------+
    | q0        | qn        | qm
    v           v           v
  HTB/FQ       FIFO   ...  FIFO

For example:
If P1 sends out packets to different Pods on other host, and
we want distribute flows from qn - qm. Then we can use skb->hash
as hash.

setup commands:
$ NETDEV=eth0
$ ip netns add n1
$ ip link add ipv1 link $NETDEV type ipvlan mode l2
$ ip link set ipv1 netns n1
$ ip netns exec n1 ifconfig ipv1 2.2.2.100/24 up

$ tc qdisc add dev $NETDEV clsact
$ tc filter add dev $NETDEV egress protocol ip prio 1 \
        flower skip_hw src_ip 2.2.2.100 action skbedit queue_mapping skbhash 2 6
$ tc qdisc add dev $NETDEV handle 1: root mq
$ tc qdisc add dev $NETDEV parent 1:1 handle 2: htb
$ tc class add dev $NETDEV parent 2: classid 2:1 htb rate 100kbit
$ tc class add dev $NETDEV parent 2: classid 2:2 htb rate 200kbit
$ tc qdisc add dev $NETDEV parent 1:2 tbf rate 100mbit burst 100mb latency 1
$ tc qdisc add dev $NETDEV parent 1:3 pfifo
$ tc qdisc add dev $NETDEV parent 1:4 pfifo
$ tc qdisc add dev $NETDEV parent 1:5 pfifo
$ tc qdisc add dev $NETDEV parent 1:6 pfifo
$ tc qdisc add dev $NETDEV parent 1:7 pfifo

$ ip netns exec n1 iperf3 -c 2.2.2.1 -i 1 -t 10 -P 10

pick txqueue from 2 - 6:
$ ethtool -S $NETDEV | grep -i tx_queue_[0-9]_bytes
     tx_queue_0_bytes: 42
     tx_queue_1_bytes: 0
     tx_queue_2_bytes: 11442586444
     tx_queue_3_bytes: 7383615334
     tx_queue_4_bytes: 3981365579
     tx_queue_5_bytes: 3983235051
     tx_queue_6_bytes: 6706236461
     tx_queue_7_bytes: 42
     tx_queue_8_bytes: 0
     tx_queue_9_bytes: 0

txqueues 2 - 6 are mapped to classid 1:3 - 1:7
$ tc -s class show dev $NETDEV
...
class mq 1:3 root leaf 8002:
 Sent 11949133672 bytes 7929798 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
class mq 1:4 root leaf 8003:
 Sent 7710449050 bytes 5117279 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
class mq 1:5 root leaf 8004:
 Sent 4157648675 bytes 2758990 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
class mq 1:6 root leaf 8005:
 Sent 4159632195 bytes 2759990 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
class mq 1:7 root leaf 8006:
 Sent 7003169603 bytes 4646912 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
...

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Alexander Lobakin <alobakin@pm.me>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Talal Ahmad <talalahmad@google.com>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Antoine Tenart <atenart@kernel.org>
Cc: Wei Wang <weiwan@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/tc_act/tc_skbedit.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
index 800e93377218..6cb6101208d0 100644
--- a/include/uapi/linux/tc_act/tc_skbedit.h
+++ b/include/uapi/linux/tc_act/tc_skbedit.h
@@ -29,6 +29,7 @@
 #define SKBEDIT_F_PTYPE			0x8
 #define SKBEDIT_F_MASK			0x10
 #define SKBEDIT_F_INHERITDSFIELD	0x20
+#define SKBEDIT_F_TXQ_SKBHASH		0x40
 
 struct tc_skbedit {
 	tc_gen;
@@ -45,6 +46,7 @@ enum {
 	TCA_SKBEDIT_PTYPE,
 	TCA_SKBEDIT_MASK,
 	TCA_SKBEDIT_FLAGS,
+	TCA_SKBEDIT_QUEUE_MAPPING_MAX,
 	__TCA_SKBEDIT_MAX
 };
 #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
-- 
cgit v1.2.3


From b617be33502d2bfefffef71924c7a7ba50264ff6 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch.siach@siklu.com>
Date: Mon, 11 Apr 2022 21:45:27 +0300
Subject: spi: add SPI_RX_CPHA_FLIP mode bit

Some SPI devices latch MOSI bits on one clock phase, but produce valid
MISO bits on the other phase. Add SPI_RX_CPHA_FLIP mode to instruct the
controller driver to flip CPHA for Rx (MISO) only transfers.

Signed-off-by: Baruch Siach <baruch.siach@siklu.com>
Link: https://lore.kernel.org/r/a715ca92713ca02071f33dcca9960a66a03c949a.1649702729.git.baruch@tkos.co.il
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/linux/spi/spi.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/spi/spi.h b/include/uapi/linux/spi/spi.h
index 236a85f08ded..9d5f58059703 100644
--- a/include/uapi/linux/spi/spi.h
+++ b/include/uapi/linux/spi/spi.h
@@ -27,6 +27,7 @@
 #define	SPI_TX_OCTAL		_BITUL(13)	/* transmit with 8 wires */
 #define	SPI_RX_OCTAL		_BITUL(14)	/* receive with 8 wires */
 #define	SPI_3WIRE_HIZ		_BITUL(15)	/* high impedance turnaround */
+#define	SPI_RX_CPHA_FLIP	_BITUL(16)	/* flip CPHA on Rx only xfer */
 
 /*
  * All the bits defined above should be covered by SPI_MODE_USER_MASK.
@@ -36,6 +37,6 @@
  * These bits must not overlap. A static assert check should make sure of that.
  * If adding extra bits, make sure to increase the bit index below as well.
  */
-#define SPI_MODE_USER_MASK	(_BITUL(16) - 1)
+#define SPI_MODE_USER_MASK	(_BITUL(17) - 1)
 
 #endif /* _UAPI_SPI_H */
-- 
cgit v1.2.3


From b4000312822615ba2222e368188029e9b725dbf4 Mon Sep 17 00:00:00 2001
From: Boris Sukholitko <boris.sukholitko@broadcom.com>
Date: Tue, 19 Apr 2022 11:14:33 +0300
Subject: net/sched: flower: Add number of vlan tags filter

These are bookkeeping parts of the new num_of_vlans filter.
Defines, dump, load and set are being done here.

Signed-off-by: Boris Sukholitko <boris.sukholitko@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 404f97fb239c..9a2ee1e39fad 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -587,6 +587,8 @@ enum {
 	TCA_FLOWER_KEY_HASH,		/* u32 */
 	TCA_FLOWER_KEY_HASH_MASK,	/* u32 */
 
+	TCA_FLOWER_KEY_NUM_OF_VLANS,    /* u8 */
+
 	__TCA_FLOWER_MAX,
 };
 
-- 
cgit v1.2.3


From 9e4ab6c89109472082616f8d2f6ada7deaffe161 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:19 +0100
Subject: arm64/sme: Implement vector length configuration prctl()s

As for SVE provide a prctl() interface which allows processes to
configure their SME vector length.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-12-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/uapi/linux/prctl.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index e998764f0262..a5e06dcbba13 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -272,6 +272,15 @@ struct prctl_mm_map {
 # define PR_SCHED_CORE_SCOPE_THREAD_GROUP	1
 # define PR_SCHED_CORE_SCOPE_PROCESS_GROUP	2
 
+/* arm64 Scalable Matrix Extension controls */
+/* Flag values must be in sync with SVE versions */
+#define PR_SME_SET_VL			63	/* set task vector length */
+# define PR_SME_SET_VL_ONEXEC		(1 << 18) /* defer effect until exec */
+#define PR_SME_GET_VL			64	/* get task vector length */
+/* Bits common to PR_SME_SET_VL and PR_SME_GET_VL */
+# define PR_SME_VL_LEN_MASK		0xffff
+# define PR_SME_VL_INHERIT		(1 << 17) /* inherit across exec */
+
 #define PR_SET_VMA		0x53564d41
 # define PR_SET_VMA_ANON_NAME		0
 
-- 
cgit v1.2.3


From e12310a0d30f260b26297bc8d7c95769489af038 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:28 +0100
Subject: arm64/sme: Implement ptrace support for streaming mode SVE registers

The streaming mode SVE registers are represented using the same data
structures as for SVE but since the vector lengths supported and in use
may not be the same as SVE we represent them with a new type NT_ARM_SSVE.
Unfortunately we only have a single 16 bit reserved field available in
the header so there is no space to fit the current and maximum vector
length for both standard and streaming SVE mode without redefining the
structure in a way the creates a complicatd and fragile ABI. Since FFR
is not present in streaming mode it is read and written as zero.

Setting NT_ARM_SSVE registers will put the task into streaming mode,
similarly setting NT_ARM_SVE registers will exit it. Reads that do not
correspond to the current mode of the task will return the header with
no register data. For compatibility reasons on write setting no flag for
the register type will be interpreted as setting SVE registers, though
users can provide no register data as an alternative mechanism for doing
so.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-21-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/uapi/linux/elf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 787c657bfae8..a8dc688e1826 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -431,6 +431,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_PACG_KEYS	0x408	/* ARM pointer authentication generic key */
 #define NT_ARM_TAGGED_ADDR_CTRL	0x409	/* arm64 tagged address control (prctl()) */
 #define NT_ARM_PAC_ENABLED_KEYS	0x40a	/* arm64 ptr auth enabled keys (prctl()) */
+#define NT_ARM_SSVE	0x40b		/* ARM Streaming SVE registers */
 #define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD	0x700		/* Vmcore Device Dump Note */
 #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
-- 
cgit v1.2.3


From 776b4a1cf36411e96972455ca72906b722b80ea1 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:29 +0100
Subject: arm64/sme: Add ptrace support for ZA

The ZA array can be read and written with the NT_ARM_ZA.  Similarly to
our interface for the SVE vector registers the regset consists of a
header with information on the current vector length followed by an
optional register data payload, represented as for signals as a series
of horizontal vectors from 0 to VL/8 in the endianness independent
format used for vectors.

On get if ZA is enabled then register data will be provided, otherwise
it will be omitted.  On set if register data is provided then ZA is
enabled and initialized using the provided data, otherwise it is
disabled.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-22-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/uapi/linux/elf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index a8dc688e1826..97808f958903 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -432,6 +432,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_TAGGED_ADDR_CTRL	0x409	/* arm64 tagged address control (prctl()) */
 #define NT_ARM_PAC_ENABLED_KEYS	0x40a	/* arm64 ptr auth enabled keys (prctl()) */
 #define NT_ARM_SSVE	0x40b		/* ARM Streaming SVE registers */
+#define NT_ARM_ZA	0x40c		/* ARM SME ZA registers */
 #define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD	0x700		/* Vmcore Device Dump Note */
 #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
-- 
cgit v1.2.3


From 26f89535a5bb17915a2e1062c3999a2ee797c7b0 Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Wed, 13 Apr 2022 22:12:46 -0700
Subject: cxl/mbox: Use type __u32 for mailbox payload sizes

Payload sizes for mailbox commands are expected to be positive values
coming from userspace. The documentation correctly describes these as
always unsigned values. The mailbox and send structures that support
the mailbox commands however, use __s32 types for the payloads.

Replace  __s32 with __u32 in the mailbox and send command structures
and update usages.

Kernel users of the interface already block all negative values and
there is no known ability for userspace to have grown a dependency on
submitting negative values to the kernel. The known user of the IOCTL,
the CXL command line interface (cxl-cli) already enforces positive
size values.

A Smatch warning of a signedness uncovered this issue.

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Link: https://lore.kernel.org/r/20220414051246.1244575-1-alison.schofield@intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/uapi/linux/cxl_mem.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h
index 8d206f27bb6d..c71021a2a9ed 100644
--- a/include/uapi/linux/cxl_mem.h
+++ b/include/uapi/linux/cxl_mem.h
@@ -68,8 +68,8 @@ static const struct {
  * struct cxl_command_info - Command information returned from a query.
  * @id: ID number for the command.
  * @flags: Flags that specify command behavior.
- * @size_in: Expected input size, or -1 if variable length.
- * @size_out: Expected output size, or -1 if variable length.
+ * @size_in: Expected input size, or ~0 if variable length.
+ * @size_out: Expected output size, or ~0 if variable length.
  *
  * Represents a single command that is supported by both the driver and the
  * hardware. This is returned as part of an array from the query ioctl. The
@@ -78,7 +78,7 @@ static const struct {
  *
  *  - @id = 10
  *  - @flags = 0
- *  - @size_in = -1
+ *  - @size_in = ~0
  *  - @size_out = 0
  *
  * See struct cxl_mem_query_commands.
@@ -89,8 +89,8 @@ struct cxl_command_info {
 	__u32 flags;
 #define CXL_MEM_COMMAND_FLAG_MASK GENMASK(0, 0)
 
-	__s32 size_in;
-	__s32 size_out;
+	__u32 size_in;
+	__u32 size_out;
 };
 
 /**
@@ -169,13 +169,13 @@ struct cxl_send_command {
 	__u32 retval;
 
 	struct {
-		__s32 size;
+		__u32 size;
 		__u32 rsvd;
 		__u64 payload;
 	} in;
 
 	struct {
-		__s32 size;
+		__u32 size;
 		__u32 rsvd;
 		__u64 payload;
 	} out;
-- 
cgit v1.2.3


From 567f882a401346779d05a90beb8f21865ebdd398 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 9 Mar 2022 10:55:43 +0000
Subject: media: cec.h: add cec_msg_recv_is_rx/tx_result helpers

These two helper functions return true if the received message
contains the result of a previous non-blocking transmit. Either
the tx_status result (cec_msg_recv_is_tx_result) of the transmit,
or the rx_status result (cec_msg_recv_is_rx_result) of the reply
to the original transmit.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
 include/uapi/linux/cec.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h
index de936f5e446d..1d48da926216 100644
--- a/include/uapi/linux/cec.h
+++ b/include/uapi/linux/cec.h
@@ -142,6 +142,26 @@ static inline void cec_msg_set_reply_to(struct cec_msg *msg,
 	msg->reply = msg->timeout = 0;
 }
 
+/**
+ * cec_msg_recv_is_tx_result - return true if this message contains the
+ *			       result of an earlier non-blocking transmit
+ * @msg:	the message structure from CEC_RECEIVE
+ */
+static inline int cec_msg_recv_is_tx_result(const struct cec_msg *msg)
+{
+	return msg->sequence && msg->tx_status && !msg->rx_status;
+}
+
+/**
+ * cec_msg_recv_is_rx_result - return true if this message contains the
+ *			       reply of an earlier non-blocking transmit
+ * @msg:	the message structure from CEC_RECEIVE
+ */
+static inline int cec_msg_recv_is_rx_result(const struct cec_msg *msg)
+{
+	return msg->sequence && !msg->tx_status && msg->rx_status;
+}
+
 /* cec_msg flags field */
 #define CEC_MSG_FL_REPLY_TO_FOLLOWERS	(1 << 0)
 #define CEC_MSG_FL_RAW			(1 << 1)
-- 
cgit v1.2.3


From 4e4dab4bb6029dbee63f12a249ddc44b0124ea63 Mon Sep 17 00:00:00 2001
From: Daniel Scally <djrscally@gmail.com>
Date: Wed, 2 Mar 2022 22:03:01 +0000
Subject: media: media.h: Add new media link type

To describe in the kernel the connection between devices and their
supporting peripherals (for example, a camera sensor and the vcm
driving the focusing lens for it), add a new type of media link
to introduce the concept of these ancillary links.

Add some elements to the uAPI documentation to explain the new link
type, their purpose and some aspects of their current implementation.

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Scally <djrscally@gmail.com>
Reviewed-by: Jean-Michel Hautbois <jeanmichel.hautbois@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
 include/uapi/linux/media.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 200fa8462b90..afbae7213d35 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -226,6 +226,7 @@ struct media_pad_desc {
 #define MEDIA_LNK_FL_LINK_TYPE			(0xf << 28)
 #  define MEDIA_LNK_FL_DATA_LINK		(0 << 28)
 #  define MEDIA_LNK_FL_INTERFACE_LINK		(1 << 28)
+#  define MEDIA_LNK_FL_ANCILLARY_LINK		(2 << 28)
 
 struct media_link_desc {
 	struct media_pad_desc source;
-- 
cgit v1.2.3


From 3d22dd432889f2f538b53f36f9f6bcd54825fc22 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 28 Mar 2022 17:01:53 +0100
Subject: media: media.h: remove unneeded <stdint.h> inclusion

Commit b3b7a9f138b7 ("[media] media-device: Use u64 ints for pointers")
added this #include <stdint.h>, presumably in order to use uintptr_t.

Now that it is gone, we can compile this for userspace without <stdint.h>.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
 include/uapi/linux/media.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index afbae7213d35..3ddadaea849f 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -20,9 +20,6 @@
 #ifndef __LINUX_MEDIA_H
 #define __LINUX_MEDIA_H
 
-#ifndef __KERNEL__
-#include <stdint.h>
-#endif
 #include <linux/ioctl.h>
 #include <linux/types.h>
 
-- 
cgit v1.2.3


From 2308d5aff8d083a44aa02197d2f5687b73d98f82 Mon Sep 17 00:00:00 2001
From: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Date: Thu, 3 Mar 2022 15:06:31 +0000
Subject: media: v4l: Add Qualcomm custom compressed pixel formats

Add custom Qualcomm raw compressed pixel formats. They are
used in Qualcomm SoCs to optimize the interconnect bandwidth.

Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
 include/uapi/linux/videodev2.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 3768a0a80830..6d465dc443b7 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -746,6 +746,8 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_INZI     v4l2_fourcc('I', 'N', 'Z', 'I') /* Intel Planar Greyscale 10-bit and Depth 16-bit */
 #define V4L2_PIX_FMT_CNF4     v4l2_fourcc('C', 'N', 'F', '4') /* Intel 4-bit packed depth confidence information */
 #define V4L2_PIX_FMT_HI240    v4l2_fourcc('H', 'I', '2', '4') /* BTTV 8-bit dithered RGB */
+#define V4L2_PIX_FMT_QC08C    v4l2_fourcc('Q', '0', '8', 'C') /* Qualcomm 8-bit compressed */
+#define V4L2_PIX_FMT_QC10C    v4l2_fourcc('Q', '1', '0', 'C') /* Qualcomm 10-bit compressed */
 
 /* 10bit raw bayer packed, 32 bytes for every 25 pixels, last LSB 6 bits unused */
 #define V4L2_PIX_FMT_IPU3_SBGGR10	v4l2_fourcc('i', 'p', '3', 'b') /* IPU3 packed 10-bit BGGR bayer */
-- 
cgit v1.2.3


From fcbc4acf8b8dff5fc420a14026bd4ab1798cf465 Mon Sep 17 00:00:00 2001
From: Dikshita Agarwal <quic_dikshita@quicinc.com>
Date: Tue, 19 Apr 2022 06:06:42 +0100
Subject: media: v4l2-ctrls: Add intra-refresh type control

Add a control to set intra-refresh type.

Signed-off-by: Dikshita Agarwal <quic_dikshita@quicinc.com>
Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
 include/uapi/linux/v4l2-controls.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index bb40129446d4..dfff69ed88f7 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -449,6 +449,11 @@ enum v4l2_mpeg_video_multi_slice_mode {
 #define V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES		(V4L2_CID_CODEC_BASE+234)
 #define V4L2_CID_MPEG_VIDEO_DEC_CONCEAL_COLOR		(V4L2_CID_CODEC_BASE+235)
 #define V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD	(V4L2_CID_CODEC_BASE+236)
+#define V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE	(V4L2_CID_CODEC_BASE+237)
+enum v4l2_mpeg_video_intra_refresh_period_type {
+	V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE_RANDOM	= 0,
+	V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE_CYCLIC	= 1,
+};
 
 /* CIDs for the MPEG-2 Part 2 (H.262) codec */
 #define V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL			(V4L2_CID_CODEC_BASE+270)
-- 
cgit v1.2.3


From 8e29da69feade64ec7fe9e1a2824b967c5183a21 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 18 Apr 2022 10:44:00 -0600
Subject: io_uring: add support for IORING_ASYNC_CANCEL_ALL

The current cancelation will lookup and cancel the first request it
finds based on the key passed in. Add a flag that allows to cancel any
request that matches they key. It completes with the number of requests
found and canceled, or res < 0 if an error occured.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/20220418164402.75259-4-axboe@kernel.dk
---
 include/uapi/linux/io_uring.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 1845cf7c80ba..476e58a2837f 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -187,6 +187,13 @@ enum {
 #define IORING_POLL_UPDATE_EVENTS	(1U << 1)
 #define IORING_POLL_UPDATE_USER_DATA	(1U << 2)
 
+/*
+ * ASYNC_CANCEL flags.
+ *
+ * IORING_ASYNC_CANCEL_ALL	Cancel all requests that match the given key
+ */
+#define IORING_ASYNC_CANCEL_ALL	(1U << 0)
+
 /*
  * IO completion data structure (Completion Queue Entry)
  */
-- 
cgit v1.2.3


From 4bf94615b8886305199ed5755cb72fea88258d15 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 18 Apr 2022 10:44:01 -0600
Subject: io_uring: allow IORING_OP_ASYNC_CANCEL with 'fd' key

Currently sqe->addr must contain the user_data of the request being
canceled. Introduce the IORING_ASYNC_CANCEL_FD flag, which tells the
kernel that we're keying off the file fd instead for cancelation. This
allows canceling any request that a) uses a file, and b) was assigned the
file based on the value being passed in.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/20220418164402.75259-5-axboe@kernel.dk
---
 include/uapi/linux/io_uring.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 476e58a2837f..cc7fe82a1798 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -191,8 +191,11 @@ enum {
  * ASYNC_CANCEL flags.
  *
  * IORING_ASYNC_CANCEL_ALL	Cancel all requests that match the given key
+ * IORING_ASYNC_CANCEL_FD	Key off 'fd' for cancelation rather than the
+ *				request 'user_data'
  */
 #define IORING_ASYNC_CANCEL_ALL	(1U << 0)
+#define IORING_ASYNC_CANCEL_FD	(1U << 1)
 
 /*
  * IO completion data structure (Completion Queue Entry)
-- 
cgit v1.2.3


From 970f256edb8c1259c8ed48d52b38215135396126 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 18 Apr 2022 10:44:02 -0600
Subject: io_uring: add support for IORING_ASYNC_CANCEL_ANY

Rather than match on a specific key, be it user_data or file, allow
canceling any request that we can lookup. Works like
IORING_ASYNC_CANCEL_ALL in that it cancels multiple requests, but it
doesn't key off user_data or the file.

Can't be set with IORING_ASYNC_CANCEL_FD, as that's a key selector.
Only one may be used at the time.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/20220418164402.75259-6-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index cc7fe82a1798..980d82eb196e 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -193,9 +193,11 @@ enum {
  * IORING_ASYNC_CANCEL_ALL	Cancel all requests that match the given key
  * IORING_ASYNC_CANCEL_FD	Key off 'fd' for cancelation rather than the
  *				request 'user_data'
+ * IORING_ASYNC_CANCEL_ANY	Match any request
  */
 #define IORING_ASYNC_CANCEL_ALL	(1U << 0)
 #define IORING_ASYNC_CANCEL_FD	(1U << 1)
+#define IORING_ASYNC_CANCEL_ANY	(1U << 2)
 
 /*
  * IO completion data structure (Completion Queue Entry)
-- 
cgit v1.2.3


From e9621e2bec80fe63f677a759066a5089b292f43a Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@fb.com>
Date: Wed, 23 Mar 2022 08:44:19 -0700
Subject: io_uring: add fsetxattr and setxattr support

This adds support to io_uring for the fsetxattr and setxattr API.

Signed-off-by: Stefan Roesch <shr@fb.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/20220323154420.3301504-4-shr@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 980d82eb196e..864bd6a4d4ff 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -45,6 +45,7 @@ struct io_uring_sqe {
 		__u32		rename_flags;
 		__u32		unlink_flags;
 		__u32		hardlink_flags;
+		__u32		xattr_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -60,7 +61,8 @@ struct io_uring_sqe {
 		__s32	splice_fd_in;
 		__u32	file_index;
 	};
-	__u64	__pad2[2];
+	__u64	addr3;
+	__u64	__pad2[1];
 };
 
 enum {
@@ -145,6 +147,8 @@ enum {
 	IORING_OP_SYMLINKAT,
 	IORING_OP_LINKAT,
 	IORING_OP_MSG_RING,
+	IORING_OP_FSETXATTR,
+	IORING_OP_SETXATTR,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
-- 
cgit v1.2.3


From a56834e0fafe0adf7f22a28a5dbec3e8c3031a0e Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@fb.com>
Date: Wed, 23 Mar 2022 08:44:20 -0700
Subject: io_uring: add fgetxattr and getxattr support

This adds support to io_uring for the fgetxattr and getxattr API.

Signed-off-by: Stefan Roesch <shr@fb.com>
Acked-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/r/20220323154420.3301504-5-shr@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 864bd6a4d4ff..8ca1d9ae56d6 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -149,6 +149,8 @@ enum {
 	IORING_OP_MSG_RING,
 	IORING_OP_FSETXATTR,
 	IORING_OP_SETXATTR,
+	IORING_OP_FGETXATTR,
+	IORING_OP_GETXATTR,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
-- 
cgit v1.2.3


From 1374e08e2d44863c931910797852589803997668 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 12 Apr 2022 14:22:40 -0600
Subject: io_uring: add socket(2) support

Supports both regular socket(2) where a normal file descriptor is
instantiated when called, or direct descriptors.

Link: https://lore.kernel.org/r/20220412202240.234207-3-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 8ca1d9ae56d6..5fb52bf32435 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -151,6 +151,7 @@ enum {
 	IORING_OP_SETXATTR,
 	IORING_OP_FGETXATTR,
 	IORING_OP_GETXATTR,
+	IORING_OP_SOCKET,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
-- 
cgit v1.2.3


From 8d92e4fbcf0fb7ecb24223b7b1ce95b9beb4dfa2 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 25 Apr 2022 06:44:21 +0300
Subject: devlink: introduce line card devices support

Line card can contain one or more devices that makes sense to make
visible to the user. For example, this can be a gearbox with
flash memory, which could be updated.

Provide the driver possibility to attach such devices to a line card
and expose those to user.

Example:
$ devlink lc show pci/0000:01:00.0 lc 8
pci/0000:01:00.0:
  lc 8 state active type 16x100G
    supported_types:
      16x100G
    devices:
      device 0
      device 1
      device 2
      device 3

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index b3d40a5d72ff..cd578645f94f 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -575,6 +575,9 @@ enum devlink_attr {
 	DEVLINK_ATTR_LINECARD_STATE,		/* u8 */
 	DEVLINK_ATTR_LINECARD_TYPE,		/* string */
 	DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES,	/* nested */
+	DEVLINK_ATTR_LINECARD_DEVICE_LIST,	/* nested */
+	DEVLINK_ATTR_LINECARD_DEVICE,		/* nested */
+	DEVLINK_ATTR_LINECARD_DEVICE_INDEX,	/* u32 */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
-- 
cgit v1.2.3


From 276910aecc6a4076f5fbfd8160ff70695d6c1eb5 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 25 Apr 2022 06:44:22 +0300
Subject: devlink: introduce line card info get message

Allow the driver to provide per line card info get op to fill-up info,
similar to the "devlink dev info".

Example:

$ devlink lc info pci/0000:01:00.0 lc 8
pci/0000:01:00.0:
  lc 8
    versions:
        fixed:
          hw.revision 0
        running:
          ini.version 4

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index cd578645f94f..fb8c3864457f 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -136,6 +136,8 @@ enum devlink_command {
 	DEVLINK_CMD_LINECARD_NEW,
 	DEVLINK_CMD_LINECARD_DEL,
 
+	DEVLINK_CMD_LINECARD_INFO_GET,		/* can dump */
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
-- 
cgit v1.2.3


From e8e7fbb6a39cd6761c843d97851eb40c5885e922 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Tue, 12 Apr 2022 11:49:43 +0200
Subject: PCI: Add PCI_EXP_SLTCTL_ASPL_DISABLE macro
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add macro defining Auto Slot Power Limit Disable bit in Slot Control
Register.

Link: https://lore.kernel.org/r/20220412094946.27069-2-pali@kernel.org
Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/uapi/linux/pci_regs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index bee1a9ed6e66..108f8523fa04 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -616,6 +616,7 @@
 #define  PCI_EXP_SLTCTL_PWR_OFF        0x0400 /* Power Off */
 #define  PCI_EXP_SLTCTL_EIC	0x0800	/* Electromechanical Interlock Control */
 #define  PCI_EXP_SLTCTL_DLLSCE	0x1000	/* Data Link Layer State Changed Enable */
+#define  PCI_EXP_SLTCTL_ASPL_DISABLE	0x2000 /* Auto Slot Power Limit Disable */
 #define  PCI_EXP_SLTCTL_IBPD_DISABLE	0x4000 /* In-band PD disable */
 #define PCI_EXP_SLTSTA		0x1a	/* Slot Status */
 #define  PCI_EXP_SLTSTA_ABP	0x0001	/* Attention Button Pressed */
-- 
cgit v1.2.3


From 7d5e005d982527e4029b0139823d179986e34cdc Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 22 Apr 2022 15:03:25 +0300
Subject: fanotify: implement "evictable" inode marks

When an inode mark is created with flag FAN_MARK_EVICTABLE, it will not
pin the marked inode to inode cache, so when inode is evicted from cache
due to memory pressure, the mark will be lost.

When an inode mark with flag FAN_MARK_EVICATBLE is updated without using
this flag, the marked inode is pinned to inode cache.

When an inode mark is updated with flag FAN_MARK_EVICTABLE but an
existing mark already has the inode pinned, the mark update fails with
error EEXIST.

Evictable inode marks can be used to setup inode marks with ignored mask
to suppress events from uninteresting files or directories in a lazy
manner, upon receiving the first event, without having to iterate all
the uninteresting files or directories before hand.

The evictbale inode mark feature allows performing this lazy marks setup
without exhausting the system memory with pinned inodes.

This change does not enable the feature yet.

Link: https://lore.kernel.org/linux-fsdevel/CAOQ4uxiRDpuS=2uA6+ZUM7yG9vVU-u212tkunBmSnP_u=mkv=Q@mail.gmail.com/
Link: https://lore.kernel.org/r/20220422120327.3459282-15-amir73il@gmail.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/uapi/linux/fanotify.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index e8ac38cc2fd6..f1f89132d60e 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -82,6 +82,7 @@
 #define FAN_MARK_IGNORED_SURV_MODIFY	0x00000040
 #define FAN_MARK_FLUSH		0x00000080
 /* FAN_MARK_FILESYSTEM is	0x00000100 */
+#define FAN_MARK_EVICTABLE	0x00000200
 
 /* These are NOT bitwise flags.  Both bits can be used togther.  */
 #define FAN_MARK_INODE		0x00000000
-- 
cgit v1.2.3


From c0a5a21c25f37c9fd7b36072f9968cdff1e4aa13 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Mon, 25 Apr 2022 03:18:51 +0530
Subject: bpf: Allow storing referenced kptr in map

Extending the code in previous commits, introduce referenced kptr
support, which needs to be tagged using 'kptr_ref' tag instead. Unlike
unreferenced kptr, referenced kptr have a lot more restrictions. In
addition to the type matching, only a newly introduced bpf_kptr_xchg
helper is allowed to modify the map value at that offset. This transfers
the referenced pointer being stored into the map, releasing the
references state for the program, and returning the old value and
creating new reference state for the returned pointer.

Similar to unreferenced pointer case, return value for this case will
also be PTR_TO_BTF_ID_OR_NULL. The reference for the returned pointer
must either be eventually released by calling the corresponding release
function, otherwise it must be transferred into another map.

It is also allowed to call bpf_kptr_xchg with a NULL pointer, to clear
the value, and obtain the old value if any.

BPF_LDX, BPF_STX, and BPF_ST cannot access referenced kptr. A future
commit will permit using BPF_LDX for such pointers, but attempt at
making it safe, since the lifetime of object won't be guaranteed.

There are valid reasons to enforce the restriction of permitting only
bpf_kptr_xchg to operate on referenced kptr. The pointer value must be
consistent in face of concurrent modification, and any prior values
contained in the map must also be released before a new one is moved
into the map. To ensure proper transfer of this ownership, bpf_kptr_xchg
returns the old value, which the verifier would require the user to
either free or move into another map, and releases the reference held
for the pointer being moved in.

In the future, direct BPF_XCHG instruction may also be permitted to work
like bpf_kptr_xchg helper.

Note that process_kptr_func doesn't have to call
check_helper_mem_access, since we already disallow rdonly/wronly flags
for map, which is what check_map_access_type checks, and we already
ensure the PTR_TO_MAP_VALUE refers to kptr by obtaining its off_desc,
so check_map_access is also not required.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-4-memxor@gmail.com
---
 include/uapi/linux/bpf.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d14b10b85e51..444fe6f1cf35 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5143,6 +5143,17 @@ union bpf_attr {
  *		The **hash_algo** is returned on success,
  *		**-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
  *		invalid arguments are passed.
+ *
+ * void *bpf_kptr_xchg(void *map_value, void *ptr)
+ *	Description
+ *		Exchange kptr at pointer *map_value* with *ptr*, and return the
+ *		old value. *ptr* can be NULL, otherwise it must be a referenced
+ *		pointer which will be released when this helper is called.
+ *	Return
+ *		The old value of kptr (which can be NULL). The returned pointer
+ *		if not NULL, is a reference which must be released using its
+ *		corresponding release function, or moved into a BPF map before
+ *		program exit.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5339,6 +5350,7 @@ union bpf_attr {
 	FN(copy_from_user_task),	\
 	FN(skb_set_tstamp),		\
 	FN(ima_file_hash),		\
+	FN(kptr_xchg),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From d8fc1c7c4c9b705ce5f5bba772ad66a0137c685d Mon Sep 17 00:00:00 2001
From: Andrew Davis <afd@ti.com>
Date: Mon, 25 Apr 2022 09:16:17 -0500
Subject: tee: remove flags TEE_IOCTL_SHM_MAPPED and TEE_IOCTL_SHM_DMA_BUF

These look to be leftover from an early edition of this driver. Userspace
does not need this information. Checking all users of this that I have
access to I have verified no one is using them.

They leak internal use flags out to userspace. Even more they are not
correct anymore after a45ea4efa358. Lets drop these flags before
someone does try to use them for something and they become ABI.

Signed-off-by: Andrew Davis <afd@ti.com>
Acked-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 include/uapi/linux/tee.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h
index 25a6c534beb1..23e57164693c 100644
--- a/include/uapi/linux/tee.h
+++ b/include/uapi/linux/tee.h
@@ -42,10 +42,6 @@
 #define TEE_IOC_MAGIC	0xa4
 #define TEE_IOC_BASE	0
 
-/* Flags relating to shared memory */
-#define TEE_IOCTL_SHM_MAPPED	0x1	/* memory mapped in normal world */
-#define TEE_IOCTL_SHM_DMA_BUF	0x2	/* dma-buf handle on shared memory */
-
 #define TEE_MAX_ARG_SIZE	1024
 
 #define TEE_GEN_CAP_GP		(1 << 0)/* GlobalPlatform compliant TEE */
-- 
cgit v1.2.3


From cc51eaa8b530bf070e76847a717adcbf603469b7 Mon Sep 17 00:00:00 2001
From: Dylan Yudaken <dylany@fb.com>
Date: Tue, 26 Apr 2022 01:29:04 -0700
Subject: io_uring: add type to op enum

It is useful to have a type enum for opcodes, to allow the compiler to
assert that every value is used in a switch statement.

Signed-off-by: Dylan Yudaken <dylany@fb.com>
Link: https://lore.kernel.org/r/20220426082907.3600028-2-dylany@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 5fb52bf32435..49d1f3994f8d 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -105,7 +105,7 @@ enum {
 #define IORING_SETUP_R_DISABLED	(1U << 6)	/* start with ring disabled */
 #define IORING_SETUP_SUBMIT_ALL	(1U << 7)	/* continue submit on error */
 
-enum {
+enum io_uring_op {
 	IORING_OP_NOP,
 	IORING_OP_READV,
 	IORING_OP_WRITEV,
-- 
cgit v1.2.3


From 052e1f01bfae8be6f31b61ed3a2356edfca855dc Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 26 Apr 2022 10:54:33 -0700
Subject: net: atm: remove support for ZeitNet ZN122x ATM devices

This driver received nothing but automated fixes in the last 15 years.
Since it's using virt_to_bus it's unlikely to be used on any modern
platform.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/atm_zatm.h | 47 -------------------------------------------
 1 file changed, 47 deletions(-)
 delete mode 100644 include/uapi/linux/atm_zatm.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h
deleted file mode 100644
index 5135027b93c1..000000000000
--- a/include/uapi/linux/atm_zatm.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/* atm_zatm.h - Driver-specific declarations of the ZATM driver (for use by
-		driver-specific utilities) */
-
-/* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */
-
-
-#ifndef LINUX_ATM_ZATM_H
-#define LINUX_ATM_ZATM_H
-
-/*
- * Note: non-kernel programs including this file must also include
- * sys/types.h for struct timeval
- */
-
-#include <linux/atmapi.h>
-#include <linux/atmioc.h>
-
-#define ZATM_GETPOOL	_IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc)
-						/* get pool statistics */
-#define ZATM_GETPOOLZ	_IOW('a',ATMIOC_SARPRV+2,struct atmif_sioc)
-						/* get statistics and zero */
-#define ZATM_SETPOOL	_IOW('a',ATMIOC_SARPRV+3,struct atmif_sioc)
-						/* set pool parameters */
-
-struct zatm_pool_info {
-	int ref_count;			/* free buffer pool usage counters */
-	int low_water,high_water;	/* refill parameters */
-	int rqa_count,rqu_count;	/* queue condition counters */
-	int offset,next_off;		/* alignment optimizations: offset */
-	int next_cnt,next_thres;	/* repetition counter and threshold */
-};
-
-struct zatm_pool_req {
-	int pool_num;			/* pool number */
-	struct zatm_pool_info info;	/* actual information */
-};
-
-#define ZATM_OAM_POOL		0	/* free buffer pool for OAM cells */
-#define ZATM_AAL0_POOL		1	/* free buffer pool for AAL0 cells */
-#define ZATM_AAL5_POOL_BASE	2	/* first AAL5 free buffer pool */
-#define ZATM_LAST_POOL	ZATM_AAL5_POOL_BASE+10 /* max. 64 kB */
-
-#define ZATM_TIMER_HISTORY_SIZE	16	/* number of timer adjustments to
-					   record; must be 2^n */
-
-#endif
-- 
cgit v1.2.3


From c35fe2a68f29a0bda15ae994154cacaae5f69791 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 25 Apr 2022 16:18:33 +0100
Subject: elf: Fix the arm64 MTE ELF segment name and value

Unfortunately, the name/value choice for the MTE ELF segment type
(PT_ARM_MEMTAG_MTE) was pretty poor: LOPROC+1 is already in use by
PT_AARCH64_UNWIND, as defined in the AArch64 ELF ABI
(https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst).

Update the ELF segment type value to LOPROC+2 and also change the define
to PT_AARCH64_MEMTAG_MTE to match the AArch64 ELF ABI namespace. The
AArch64 ELF ABI document is updating accordingly (segment type not
previously mentioned in the document).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Fixes: 761b9b366cec ("elf: Introduce the ARM MTE ELF segment type")
Cc: Will Deacon <will@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Luis Machado <luis.machado@arm.com>
Cc: Richard Earnshaw <Richard.Earnshaw@arm.com>
Link: https://lore.kernel.org/r/20220425151833.2603830-1-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/uapi/linux/elf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 787c657bfae8..7ce993e6786c 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -42,7 +42,7 @@ typedef __s64	Elf64_Sxword;
 
 
 /* ARM MTE memory tag segment type */
-#define PT_ARM_MEMTAG_MTE	(PT_LOPROC + 0x1)
+#define PT_AARCH64_MEMTAG_MTE	(PT_LOPROC + 0x2)
 
 /*
  * Extended Numbering
-- 
cgit v1.2.3


From d495f942f40aa412f8d4d65951152648cfa09903 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 22 Apr 2022 12:30:13 +0200
Subject: KVM: fix bad user ABI for KVM_EXIT_SYSTEM_EVENT

When KVM_EXIT_SYSTEM_EVENT was introduced, it included a flags
member that at the time was unused.  Unfortunately this extensibility
mechanism has several issues:

- x86 is not writing the member, so it would not be possible to use it
  on x86 except for new events

- the member is not aligned to 64 bits, so the definition of the
  uAPI struct is incorrect for 32- on 64-bit userspace.  This is a
  problem for RISC-V, which supports CONFIG_KVM_COMPAT, but fortunately
  usage of flags was only introduced in 5.18.

Since padding has to be introduced, place a new field in there
that tells if the flags field is valid.  To allow further extensibility,
in fact, change flags to an array of 16 values, and store how many
of the values are valid.  The availability of the new ndata field
is tied to a system capability; all architectures are changed to
fill in the field.

To avoid breaking compilation of userspace that was using the flags
field, provide a userspace-only union to overlap flags with data[0].
The new field is placed at the same offset for both 32- and 64-bit
userspace.

Cc: Will Deacon <will@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Peter Gonda <pgonda@google.com>
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reported-by: kernel test robot <lkp@intel.com>
Message-Id: <20220422103013.34832-1-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 91a6fe4e02c0..6a184d260c7f 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -445,7 +445,13 @@ struct kvm_run {
 #define KVM_SYSTEM_EVENT_RESET          2
 #define KVM_SYSTEM_EVENT_CRASH          3
 			__u32 type;
-			__u64 flags;
+			__u32 ndata;
+			union {
+#ifndef __KERNEL__
+				__u64 flags;
+#endif
+				__u64 data[16];
+			};
 		} system_event;
 		/* KVM_EXIT_S390_STSI */
 		struct {
@@ -1144,6 +1150,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_MEM_OP_EXTENSION 211
 #define KVM_CAP_PMU_CAPABILITY 212
 #define KVM_CAP_DISABLE_QUIRKS2 213
+/* #define KVM_CAP_VM_TSC_CONTROL 214 */
+#define KVM_CAP_SYSTEM_EVENT_DATA 215
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 0e0af57e0e91b304f36b7d1dba859e3c04094273 Mon Sep 17 00:00:00 2001
From: "Dr. Thomas Orgis" <thomas.orgis@uni-hamburg.de>
Date: Fri, 29 Apr 2022 14:38:03 -0700
Subject: taskstats: version 12 with thread group and exe info

The task exit struct needs some crucial information to be able to provide
an enhanced version of process and thread accounting.  This change
provides:

1. ac_tgid in additon to ac_pid
2. thread group execution walltime in ac_tgetime
3. flag AGROUP in ac_flag to indicate the last task
   in a thread group / process
4. device ID and inode of task's /proc/self/exe in
   ac_exe_dev and ac_exe_inode
5. tools/accounting/procacct as demonstrator

When a task exits, taskstats are reported to userspace including the
task's pid and ppid, but without the id of the thread group this task is
part of.  Without the tgid, the stats of single tasks cannot be correlated
to each other as a thread group (process).

The taskstats documentation suggests that on process exit a data set
consisting of accumulated stats for the whole group is produced.  But such
an additional set of stats is only produced for actually multithreaded
processes, not groups that had only one thread, and also those stats only
contain data about delay accounting and not the more basic information
about CPU and memory resource usage.  Adding the AGROUP flag to be set
when the last task of a group exited enables determination of process end
also for single-threaded processes.

My applicaton basically does enhanced process accounting with summed
cputime, biggest maxrss, tasks per process.  The data is not available
with the traditional BSD process accounting (which is not designed to be
extensible) and the taskstats interface allows more efficient on-the-fly
grouping and summing of the stats, anyway, without intermediate disk
writes.

Furthermore, I do carry statistics on which exact program binary is used
how often with associated resources, getting a picture on how important
which parts of a collection of installed scientific software in different
versions are, and how well they put load on the machine.  This is enabled
by providing information on /proc/self/exe for each task.  I assume the
two 64-bit fields for device ID and inode are more appropriate than the
possibly large resolved path to keep the data volume down.

Add the tgid to the stats to complete task identification, the flag AGROUP
to mark the last task of a group, the group wallclock time, and
inode-based identification of the associated executable file.

Add tools/accounting/procacct.c as a simplified fork of getdelays.c to
demonstrate process and thread accounting.

[thomas.orgis@uni-hamburg.de: fix version number in comment]
  Link: https://lkml.kernel.org/r/20220405003601.7a5f6008@plasteblaster
Link: https://lkml.kernel.org/r/20220331004106.64e5616b@plasteblaster
Signed-off-by: Dr. Thomas Orgis <thomas.orgis@uni-hamburg.de>
Reviewed-by: Ismael Luceno <ismael@iodev.co.uk>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/uapi/linux/acct.h      |  3 ++-
 include/uapi/linux/taskstats.h | 24 +++++++++++++++++++++---
 2 files changed, 23 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/acct.h b/include/uapi/linux/acct.h
index 985b89068591..0e591152aa8a 100644
--- a/include/uapi/linux/acct.h
+++ b/include/uapi/linux/acct.h
@@ -103,12 +103,13 @@ struct acct_v3
 /*
  *  accounting flags
  */
-				/* bit set when the process ... */
+				/* bit set when the process/task ... */
 #define AFORK		0x01	/* ... executed fork, but did not exec */
 #define ASU		0x02	/* ... used super-user privileges */
 #define ACOMPAT		0x04	/* ... used compatibility mode (VAX only not used) */
 #define ACORE		0x08	/* ... dumped core */
 #define AXSIG		0x10	/* ... was killed by a signal */
+#define AGROUP		0x20	/* ... was the last task of the process (task group) */
 
 #if defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN)
 #define ACCT_BYTEORDER	0x80	/* accounting file is big endian */
diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
index 12327d32378f..736154171489 100644
--- a/include/uapi/linux/taskstats.h
+++ b/include/uapi/linux/taskstats.h
@@ -34,7 +34,7 @@
  */
 
 
-#define TASKSTATS_VERSION	11
+#define TASKSTATS_VERSION	12
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -48,7 +48,8 @@ struct taskstats {
 	__u32	ac_exitcode;		/* Exit status */
 
 	/* The accounting flags of a task as defined in <linux/acct.h>
-	 * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG.
+	 * Defined values are AFORK, ASU, ACOMPAT, ACORE, AXSIG, and AGROUP.
+	 * (AGROUP since version 12).
 	 */
 	__u8	ac_flag;		/* Record flags */
 	__u8	ac_nice;		/* task_nice */
@@ -173,9 +174,26 @@ struct taskstats {
 	/* v10: 64-bit btime to avoid overflow */
 	__u64	ac_btime64;		/* 64-bit begin time */
 
-	/* Delay waiting for memory compact */
+	/* v11: Delay waiting for memory compact */
 	__u64	compact_count;
 	__u64	compact_delay_total;
+
+	/* v12 begin */
+	__u32   ac_tgid;	/* thread group ID */
+	/* Thread group walltime up to now. This is total process walltime if
+	 * AGROUP flag is set.
+	 */
+	__u64	ac_tgetime __attribute__((aligned(8)));
+	/* Lightweight information to identify process binary files.
+	 * This leaves userspace to match this to a file system path, using
+	 * MAJOR() and MINOR() macros to identify a device and mount point,
+	 * the inode to identify the executable file. This is /proc/self/exe
+	 * at the end, so matching the most recent exec(). Values are zero
+	 * for kernel threads.
+	 */
+	__u64   ac_exe_dev;     /* program binary device ID */
+	__u64   ac_exe_inode;   /* program binary inode number */
+	/* v12 end */
 };
 
 
-- 
cgit v1.2.3


From f548a12efd5ab97e6b1fb332e5634ce44b3d9328 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 26 Apr 2022 17:39:50 -0600
Subject: io_uring: return hint on whether more data is available after receive

For now just use a CQE flag for this, with big CQE support we could
return the actual number of bytes left.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 49d1f3994f8d..92d1799892b2 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -220,9 +220,11 @@ struct io_uring_cqe {
  *
  * IORING_CQE_F_BUFFER	If set, the upper 16 bits are the buffer ID
  * IORING_CQE_F_MORE	If set, parent SQE will generate more CQE entries
+ * IORING_CQE_F_SOCK_NONEMPTY	If set, more data to read after socket recv
  */
 #define IORING_CQE_F_BUFFER		(1U << 0)
 #define IORING_CQE_F_MORE		(1U << 1)
+#define IORING_CQE_F_SOCK_NONEMPTY	(1U << 2)
 
 enum {
 	IORING_CQE_BUFFER_SHIFT		= 16,
-- 
cgit v1.2.3


From e1169f06d5bbdbc2b22ae4e3083a4bf75ae5ecee Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 25 Apr 2022 19:49:03 -0600
Subject: io_uring: use TWA_SIGNAL_NO_IPI if IORING_SETUP_COOP_TASKRUN is used

If this is set, io_uring will never use an IPI to deliver a task_work
notification. This can be used in the common case where a single task or
thread communicates with the ring, and doesn't rely on
io_uring_cqe_peek().

This provides a noticeable win in performance, both from eliminating
the IPI itself, but also from avoiding interrupting the submitting
task unnecessarily.

Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/20220426014904.60384-6-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 980d82eb196e..a84f29d657c3 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -102,6 +102,14 @@ enum {
 #define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
 #define IORING_SETUP_R_DISABLED	(1U << 6)	/* start with ring disabled */
 #define IORING_SETUP_SUBMIT_ALL	(1U << 7)	/* continue submit on error */
+/*
+ * Cooperative task running. When requests complete, they often require
+ * forcing the submitter to transition to the kernel to complete. If this
+ * flag is set, work will be done when the task transitions anyway, rather
+ * than force an inter-processor interrupt reschedule. This avoids interrupting
+ * a task running in userspace, and saves an IPI.
+ */
+#define IORING_SETUP_COOP_TASKRUN	(1U << 8)
 
 enum {
 	IORING_OP_NOP,
-- 
cgit v1.2.3


From ef060ea9e4fd3b763e7060a3af0a258d2d5d7c0d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 25 Apr 2022 19:49:04 -0600
Subject: io_uring: add IORING_SETUP_TASKRUN_FLAG

If IORING_SETUP_COOP_TASKRUN is set to use cooperative scheduling for
running task_work, then IORING_SETUP_TASKRUN_FLAG can be set so the
application can tell if task_work is pending in the kernel for this
ring. This allows use cases like io_uring_peek_cqe() to still function
appropriately, or for the task to know when it would be useful to
call io_uring_wait_cqe() to run pending events.

Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/20220426014904.60384-7-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index a84f29d657c3..fad63564678a 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -110,6 +110,12 @@ enum {
  * a task running in userspace, and saves an IPI.
  */
 #define IORING_SETUP_COOP_TASKRUN	(1U << 8)
+/*
+ * If COOP_TASKRUN is set, get notified if task work is available for
+ * running and a kernel transition would be needed to run it. This sets
+ * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN.
+ */
+#define IORING_SETUP_TASKRUN_FLAG	(1U << 9)
 
 enum {
 	IORING_OP_NOP,
@@ -256,6 +262,7 @@ struct io_sqring_offsets {
  */
 #define IORING_SQ_NEED_WAKEUP	(1U << 0) /* needs io_uring_enter wakeup */
 #define IORING_SQ_CQ_OVERFLOW	(1U << 1) /* CQ ring is overflown */
+#define IORING_SQ_TASKRUN	(1U << 2) /* task should enter the kernel */
 
 struct io_cqring_offsets {
 	__u32 head;
-- 
cgit v1.2.3


From 3254e0b9eb5649ffaa48717ebc9c593adc4ee6a9 Mon Sep 17 00:00:00 2001
From: Alexandru Tachici <alexandru.tachici@analog.com>
Date: Fri, 29 Apr 2022 18:34:31 +0300
Subject: ethtool: Add 10base-T1L link mode entry

Add entry for the 10base-T1L full duplex mode.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Alexandru Tachici <alexandru.tachici@analog.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 7bc4b8def12c..e0f0ee9bc89e 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1691,6 +1691,7 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT	 = 89,
 	ETHTOOL_LINK_MODE_100baseFX_Half_BIT		 = 90,
 	ETHTOOL_LINK_MODE_100baseFX_Full_BIT		 = 91,
+	ETHTOOL_LINK_MODE_10baseT1L_Full_BIT		 = 92,
 	/* must be last entry */
 	__ETHTOOL_LINK_MODE_MASK_NBITS
 };
-- 
cgit v1.2.3


From 909b4f2bf764a903e9183111368f1509f9b40e6d Mon Sep 17 00:00:00 2001
From: Alexandru Tachici <alexandru.tachici@analog.com>
Date: Fri, 29 Apr 2022 18:34:32 +0300
Subject: net: phy: Add 10-BaseT1L registers

The 802.3gc specification defines the 10-BaseT1L link
mode for ethernet trafic on twisted wire pair.

PMA status register can be used to detect if the phy supports
2.4 V TX level and PCS control register can be used to
enable/disable PCS level loopback.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Alexandru Tachici <alexandru.tachici@analog.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mdio.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index c54e6eae5366..0b2eba36dd7c 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -67,6 +67,9 @@
 #define MDIO_PCS_10GBRT_STAT2	33	/* 10GBASE-R/-T PCS status 2 */
 #define MDIO_AN_10GBT_CTRL	32	/* 10GBASE-T auto-negotiation control */
 #define MDIO_AN_10GBT_STAT	33	/* 10GBASE-T auto-negotiation status */
+#define MDIO_B10L_PMA_CTRL	2294	/* 10BASE-T1L PMA control */
+#define MDIO_PMA_10T1L_STAT	2295	/* 10BASE-T1L PMA status */
+#define MDIO_PCS_10T1L_CTRL	2278	/* 10BASE-T1L PCS control */
 
 /* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */
 #define MDIO_PMA_LASI_RXCTRL	0x9000	/* RX_ALARM control */
@@ -268,6 +271,28 @@
 #define MDIO_AN_10GBT_STAT_MS		0x4000	/* Master/slave config */
 #define MDIO_AN_10GBT_STAT_MSFLT	0x8000	/* Master/slave config fault */
 
+/* 10BASE-T1L PMA control */
+#define MDIO_PMA_10T1L_CTRL_LB_EN	0x0001	/* Enable loopback mode */
+#define MDIO_PMA_10T1L_CTRL_EEE_EN	0x0400	/* Enable EEE mode */
+#define MDIO_PMA_10T1L_CTRL_LOW_POWER	0x0800	/* Low-power mode */
+#define MDIO_PMA_10T1L_CTRL_2V4_EN	0x1000	/* Enable 2.4 Vpp operating mode */
+#define MDIO_PMA_10T1L_CTRL_TX_DIS	0x4000	/* Transmit disable */
+#define MDIO_PMA_10T1L_CTRL_PMA_RST	0x8000	/* MA reset */
+
+/* 10BASE-T1L PMA status register. */
+#define MDIO_PMA_10T1L_STAT_LINK	0x0001	/* PMA receive link up */
+#define MDIO_PMA_10T1L_STAT_FAULT	0x0002	/* Fault condition detected */
+#define MDIO_PMA_10T1L_STAT_POLARITY	0x0004	/* Receive polarity is reversed */
+#define MDIO_PMA_10T1L_STAT_RECV_FAULT	0x0200	/* Able to detect fault on receive path */
+#define MDIO_PMA_10T1L_STAT_EEE		0x0400	/* PHY has EEE ability */
+#define MDIO_PMA_10T1L_STAT_LOW_POWER	0x0800	/* PMA has low-power ability */
+#define MDIO_PMA_10T1L_STAT_2V4_ABLE	0x1000	/* PHY has 2.4 Vpp operating mode ability */
+#define MDIO_PMA_10T1L_STAT_LB_ABLE	0x2000	/* PHY has loopback ability */
+
+/* 10BASE-T1L PCS control register. */
+#define MDIO_PCS_10T1L_CTRL_LB		0x4000	/* Enable PCS level loopback mode */
+#define MDIO_PCS_10T1L_CTRL_RESET	0x8000	/* PCS reset */
+
 /* EEE Supported/Advertisement/LP Advertisement registers.
  *
  * EEE capability Register (3.20), Advertisement (7.60) and
-- 
cgit v1.2.3


From 1b020e448e0fb67bcb04ee0f778d413045f965d3 Mon Sep 17 00:00:00 2001
From: Alexandru Tachici <alexandru.tachici@analog.com>
Date: Fri, 29 Apr 2022 18:34:33 +0300
Subject: net: phy: Add BaseT1 auto-negotiation registers

Added BASE-T1 AN advertisement register (Registers 7.514, 7.515, and
7.516) and BASE-T1 AN LP Base Page ability register (Registers 7.517,
7.518, and 7.519).

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Alexandru Tachici <alexandru.tachici@analog.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mdio.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index 0b2eba36dd7c..fa3515257f54 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -70,6 +70,14 @@
 #define MDIO_B10L_PMA_CTRL	2294	/* 10BASE-T1L PMA control */
 #define MDIO_PMA_10T1L_STAT	2295	/* 10BASE-T1L PMA status */
 #define MDIO_PCS_10T1L_CTRL	2278	/* 10BASE-T1L PCS control */
+#define MDIO_AN_T1_CTRL		512	/* BASE-T1 AN control */
+#define MDIO_AN_T1_STAT		513	/* BASE-T1 AN status */
+#define MDIO_AN_T1_ADV_L	514	/* BASE-T1 AN advertisement register [15:0] */
+#define MDIO_AN_T1_ADV_M	515	/* BASE-T1 AN advertisement register [31:16] */
+#define MDIO_AN_T1_ADV_H	516	/* BASE-T1 AN advertisement register [47:32] */
+#define MDIO_AN_T1_LP_L		517	/* BASE-T1 AN LP Base Page ability register [15:0] */
+#define MDIO_AN_T1_LP_M		518	/* BASE-T1 AN LP Base Page ability register [31:16] */
+#define MDIO_AN_T1_LP_H		519	/* BASE-T1 AN LP Base Page ability register [47:32] */
 
 /* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */
 #define MDIO_PMA_LASI_RXCTRL	0x9000	/* RX_ALARM control */
@@ -293,6 +301,38 @@
 #define MDIO_PCS_10T1L_CTRL_LB		0x4000	/* Enable PCS level loopback mode */
 #define MDIO_PCS_10T1L_CTRL_RESET	0x8000	/* PCS reset */
 
+/* BASE-T1 auto-negotiation advertisement register [15:0] */
+#define MDIO_AN_T1_ADV_L_PAUSE_CAP	ADVERTISE_PAUSE_CAP
+#define MDIO_AN_T1_ADV_L_PAUSE_ASYM	ADVERTISE_PAUSE_ASYM
+#define MDIO_AN_T1_ADV_L_FORCE_MS	0x1000	/* Force Master/slave Configuration */
+#define MDIO_AN_T1_ADV_L_REMOTE_FAULT	ADVERTISE_RFAULT
+#define MDIO_AN_T1_ADV_L_ACK		ADVERTISE_LPACK
+#define MDIO_AN_T1_ADV_L_NEXT_PAGE_REQ	ADVERTISE_NPAGE
+
+/* BASE-T1 auto-negotiation advertisement register [31:16] */
+#define MDIO_AN_T1_ADV_M_B10L		0x4000	/* device is compatible with 10BASE-T1L */
+#define MDIO_AN_T1_ADV_M_MST		0x0010	/* advertise master preference */
+
+/* BASE-T1 auto-negotiation advertisement register [47:32] */
+#define MDIO_AN_T1_ADV_H_10L_TX_HI_REQ	0x1000	/* 10BASE-T1L High Level Transmit Request */
+#define MDIO_AN_T1_ADV_H_10L_TX_HI	0x2000	/* 10BASE-T1L High Level Transmit Ability */
+
+/* BASE-T1 AN LP Base Page ability register [15:0] */
+#define MDIO_AN_T1_LP_L_PAUSE_CAP	LPA_PAUSE_CAP
+#define MDIO_AN_T1_LP_L_PAUSE_ASYM	LPA_PAUSE_ASYM
+#define MDIO_AN_T1_LP_L_FORCE_MS	0x1000	/* LP Force Master/slave Configuration */
+#define MDIO_AN_T1_LP_L_REMOTE_FAULT	LPA_RFAULT
+#define MDIO_AN_T1_LP_L_ACK		LPA_LPACK
+#define MDIO_AN_T1_LP_L_NEXT_PAGE_REQ	LPA_NPAGE
+
+/* BASE-T1 AN LP Base Page ability register [31:16] */
+#define MDIO_AN_T1_LP_M_MST		0x0010	/* LP master preference */
+#define MDIO_AN_T1_LP_M_B10L		0x4000	/* LP is compatible with 10BASE-T1L */
+
+/* BASE-T1 AN LP Base Page ability register [47:32] */
+#define MDIO_AN_T1_LP_H_10L_TX_HI_REQ	0x1000	/* 10BASE-T1L High Level LP Transmit Request */
+#define MDIO_AN_T1_LP_H_10L_TX_HI	0x2000	/* 10BASE-T1L High Level LP Transmit Ability */
+
 /* EEE Supported/Advertisement/LP Advertisement registers.
  *
  * EEE capability Register (3.20), Advertisement (7.60) and
-- 
cgit v1.2.3


From 3da8ffd8545f62fec85a48a3c637b2f427974f11 Mon Sep 17 00:00:00 2001
From: Alexandru Tachici <alexandru.tachici@analog.com>
Date: Fri, 29 Apr 2022 18:34:34 +0300
Subject: net: phy: Add 10BASE-T1L support in phy-c45

This patch is needed because the BASE-T1 uses different registers
for status, control and advertisement to those already
employed in the existing phy-c45 functions.

Where required, genphy_c45 functions will now check whether
the device supports BASE-T1 and use the specific registers
instead: 45.2.7.19 BASE-T1 AN control register,
45.2.7.20 BASE-T1 AN status, 45.2.7.21 BASE-T1 AN
advertisement register, 45.2.7.22 BASE-T1 AN LP Base
Page ability register, 45.2.1.185 BASE-T1 PMA/PMD control
register.

Tested-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Alexandru Tachici <alexandru.tachici@analog.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mdio.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index fa3515257f54..75b7257a51e1 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -70,6 +70,7 @@
 #define MDIO_B10L_PMA_CTRL	2294	/* 10BASE-T1L PMA control */
 #define MDIO_PMA_10T1L_STAT	2295	/* 10BASE-T1L PMA status */
 #define MDIO_PCS_10T1L_CTRL	2278	/* 10BASE-T1L PCS control */
+#define MDIO_PMA_PMD_BT1	18	/* BASE-T1 PMA/PMD extended ability */
 #define MDIO_AN_T1_CTRL		512	/* BASE-T1 AN control */
 #define MDIO_AN_T1_STAT		513	/* BASE-T1 AN status */
 #define MDIO_AN_T1_ADV_L	514	/* BASE-T1 AN advertisement register [15:0] */
@@ -78,6 +79,7 @@
 #define MDIO_AN_T1_LP_L		517	/* BASE-T1 AN LP Base Page ability register [15:0] */
 #define MDIO_AN_T1_LP_M		518	/* BASE-T1 AN LP Base Page ability register [31:16] */
 #define MDIO_AN_T1_LP_H		519	/* BASE-T1 AN LP Base Page ability register [47:32] */
+#define MDIO_PMA_PMD_BT1_CTRL	2100	/* BASE-T1 PMA/PMD control register */
 
 /* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */
 #define MDIO_PMA_LASI_RXCTRL	0x9000	/* RX_ALARM control */
@@ -170,6 +172,7 @@
 #define MDIO_PMA_CTRL2_10BT		0x000f	/* 10BASE-T type */
 #define MDIO_PMA_CTRL2_2_5GBT		0x0030  /* 2.5GBaseT type */
 #define MDIO_PMA_CTRL2_5GBT		0x0031  /* 5GBaseT type */
+#define MDIO_PMA_CTRL2_BASET1		0x003D  /* BASE-T1 type */
 #define MDIO_PCS_CTRL2_TYPE		0x0003	/* PCS type selection */
 #define MDIO_PCS_CTRL2_10GBR		0x0000	/* 10GBASE-R type */
 #define MDIO_PCS_CTRL2_10GBX		0x0001	/* 10GBASE-X type */
@@ -223,6 +226,7 @@
 #define MDIO_PMA_EXTABLE_1000BKX	0x0040	/* 1000BASE-KX ability */
 #define MDIO_PMA_EXTABLE_100BTX		0x0080	/* 100BASE-TX ability */
 #define MDIO_PMA_EXTABLE_10BT		0x0100	/* 10BASE-T ability */
+#define MDIO_PMA_EXTABLE_BT1		0x0800	/* BASE-T1 ability */
 #define MDIO_PMA_EXTABLE_NBT		0x4000  /* 2.5/5GBASE-T ability */
 
 /* PHY XGXS lane state register. */
@@ -301,6 +305,9 @@
 #define MDIO_PCS_10T1L_CTRL_LB		0x4000	/* Enable PCS level loopback mode */
 #define MDIO_PCS_10T1L_CTRL_RESET	0x8000	/* PCS reset */
 
+/* BASE-T1 PMA/PMD extended ability register. */
+#define MDIO_PMA_PMD_BT1_B10L_ABLE	0x0004	/* 10BASE-T1L Ability */
+
 /* BASE-T1 auto-negotiation advertisement register [15:0] */
 #define MDIO_AN_T1_ADV_L_PAUSE_CAP	ADVERTISE_PAUSE_CAP
 #define MDIO_AN_T1_ADV_L_PAUSE_ASYM	ADVERTISE_PAUSE_ASYM
@@ -333,6 +340,9 @@
 #define MDIO_AN_T1_LP_H_10L_TX_HI_REQ	0x1000	/* 10BASE-T1L High Level LP Transmit Request */
 #define MDIO_AN_T1_LP_H_10L_TX_HI	0x2000	/* 10BASE-T1L High Level LP Transmit Ability */
 
+/* BASE-T1 PMA/PMD control register */
+#define MDIO_PMA_PMD_BT1_CTRL_CFG_MST	0x4000 /* MASTER-SLAVE config value */
+
 /* EEE Supported/Advertisement/LP Advertisement registers.
  *
  * EEE capability Register (3.20), Advertisement (7.60) and
-- 
cgit v1.2.3


From c2aa2dfef243efe213a480a1ee8566507a5152f4 Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Tue, 3 May 2022 01:09:56 -0700
Subject: seccomp: Add wait_killable semantic to seccomp user notifier

This introduces a per-filter flag (SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV)
that makes it so that when notifications are received by the supervisor the
notifying process will transition to wait killable semantics. Although wait
killable isn't a set of semantics formally exposed to userspace, the
concept is searchable. If the notifying process is signaled prior to the
notification being received by the userspace agent, it will be handled as
normal.

One quirk about how this is handled is that the notifying process
only switches to TASK_KILLABLE if it receives a wakeup from either
an addfd or a signal. This is to avoid an unnecessary wakeup of
the notifying task.

The reasons behind switching into wait_killable only after userspace
receives the notification are:
* Avoiding unncessary work - Often, workloads will perform work that they
  may abort (request racing comes to mind). This allows for syscalls to be
  aborted safely prior to the notification being received by the
  supervisor. In this, the supervisor doesn't end up doing work that the
  workload does not want to complete anyways.
* Avoiding side effects - We don't want the syscall to be interruptible
  once the supervisor starts doing work because it may not be trivial
  to reverse the operation. For example, unmounting a file system may
  take a long time, and it's hard to rollback, or treat that as
  reentrant.
* Avoid breaking runtimes - Various runtimes do not GC when they are
  during a syscall (or while running native code that subsequently
  calls a syscall). If many notifications are blocked, and not picked
  up by the supervisor, this can get the application into a bad state.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220503080958.20220-2-sargun@sargun.me
---
 include/uapi/linux/seccomp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 78074254ab98..0fdc6ef02b94 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -23,6 +23,8 @@
 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW		(1UL << 2)
 #define SECCOMP_FILTER_FLAG_NEW_LISTENER	(1UL << 3)
 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH		(1UL << 4)
+/* Received notifications wait in killable state (only respond to fatal signals) */
+#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV	(1UL << 5)
 
 /*
  * All BPF programs must return a 32-bit value.
-- 
cgit v1.2.3


From 41b3c69bf9414375319290c59f198ff5c71d273f Mon Sep 17 00:00:00 2001
From: Kishen Maloor <kishen.maloor@intel.com>
Date: Mon, 2 May 2022 13:52:36 -0700
Subject: mptcp: expose server_side attribute in MPTCP netlink events

This change records the 'server_side' attribute of MPTCP_EVENT_CREATED
and MPTCP_EVENT_ESTABLISHED events to inform their recipient about the
Client/Server role of the running MPTCP application.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/246
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Kishen Maloor <kishen.maloor@intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/mptcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 9690efedb5fa..e41ea01a94bb 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -188,6 +188,7 @@ enum mptcp_event_attr {
 	MPTCP_ATTR_IF_IDX,	/* s32 */
 	MPTCP_ATTR_RESET_REASON,/* u32 */
 	MPTCP_ATTR_RESET_FLAGS, /* u32 */
+	MPTCP_ATTR_SERVER_SIDE,	/* u8 */
 
 	__MPTCP_ATTR_AFTER_LAST
 };
-- 
cgit v1.2.3


From 7b33a09d036ffd9a04506122840629c7e870cf08 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 4 May 2022 03:24:40 +0000
Subject: KVM: arm64: Add support for userspace to suspend a vCPU

Introduce a new MP state, KVM_MP_STATE_SUSPENDED, which indicates a vCPU
is in a suspended state. In the suspended state the vCPU will block
until a wakeup event (pending interrupt) is recognized.

Add a new system event type, KVM_SYSTEM_EVENT_WAKEUP, to indicate to
userspace that KVM has recognized one such wakeup event. It is the
responsibility of userspace to then make the vCPU runnable, or leave it
suspended until the next wakeup event.

Signed-off-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220504032446.4133305-7-oupton@google.com
---
 include/uapi/linux/kvm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6a184d260c7f..7f72fb7b05f2 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -444,6 +444,7 @@ struct kvm_run {
 #define KVM_SYSTEM_EVENT_SHUTDOWN       1
 #define KVM_SYSTEM_EVENT_RESET          2
 #define KVM_SYSTEM_EVENT_CRASH          3
+#define KVM_SYSTEM_EVENT_WAKEUP         4
 			__u32 type;
 			__u32 ndata;
 			union {
@@ -646,6 +647,7 @@ struct kvm_vapic_addr {
 #define KVM_MP_STATE_OPERATING         7
 #define KVM_MP_STATE_LOAD              8
 #define KVM_MP_STATE_AP_RESET_HOLD     9
+#define KVM_MP_STATE_SUSPENDED         10
 
 struct kvm_mp_state {
 	__u32 mp_state;
-- 
cgit v1.2.3


From bfbab44568779e1682bc6f63688bb9c965f0e74a Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 4 May 2022 03:24:41 +0000
Subject: KVM: arm64: Implement PSCI SYSTEM_SUSPEND

ARM DEN0022D.b 5.19 "SYSTEM_SUSPEND" describes a PSCI call that allows
software to request that a system be placed in the deepest possible
low-power state. Effectively, software can use this to suspend itself to
RAM.

Unfortunately, there really is no good way to implement a system-wide
PSCI call in KVM. Any precondition checks done in the kernel will need
to be repeated by userspace since there is no good way to protect a
critical section that spans an exit to userspace. SYSTEM_RESET and
SYSTEM_OFF are equally plagued by this issue, although no users have
seemingly cared for the relatively long time these calls have been
supported.

The solution is to just make the whole implementation userspace's
problem. Introduce a new system event, KVM_SYSTEM_EVENT_SUSPEND, that
indicates to userspace a calling vCPU has invoked PSCI SYSTEM_SUSPEND.
Additionally, add a CAP to get buy-in from userspace for this new exit
type.

Only advertise the SYSTEM_SUSPEND PSCI call if userspace has opted in.
If a vCPU calls SYSTEM_SUSPEND, punt straight to userspace. Provide
explicit documentation of userspace's responsibilites for the exit and
point to the PSCI specification to describe the actual PSCI call.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220504032446.4133305-8-oupton@google.com
---
 include/uapi/linux/kvm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7f72fb7b05f2..32c56384fd08 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -445,6 +445,7 @@ struct kvm_run {
 #define KVM_SYSTEM_EVENT_RESET          2
 #define KVM_SYSTEM_EVENT_CRASH          3
 #define KVM_SYSTEM_EVENT_WAKEUP         4
+#define KVM_SYSTEM_EVENT_SUSPEND        5
 			__u32 type;
 			__u32 ndata;
 			union {
@@ -1154,6 +1155,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_DISABLE_QUIRKS2 213
 /* #define KVM_CAP_VM_TSC_CONTROL 214 */
 #define KVM_CAP_SYSTEM_EVENT_DATA 215
+#define KVM_CAP_ARM_SYSTEM_SUSPEND 216
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 2068339a6c35147847ba433fd0da67b313779059 Mon Sep 17 00:00:00 2001
From: Dipen Patel <dipenp@nvidia.com>
Date: Fri, 22 Apr 2022 13:52:18 -0700
Subject: gpiolib: cdev: Add hardware timestamp clock type

This patch adds new clock type for the GPIO controller which can
timestamp gpio lines in using hardware means. To expose such
functionalities to the userspace, code has been added where
during line create or set config API calls, it checks for new
clock type and if requested, calls HTE API. During line change
event, the HTE subsystem pushes timestamp data to userspace
through gpiolib-cdev.

Signed-off-by: Dipen Patel <dipenp@nvidia.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/uapi/linux/gpio.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h
index eaaea3d8e6b4..cb9966d49a16 100644
--- a/include/uapi/linux/gpio.h
+++ b/include/uapi/linux/gpio.h
@@ -66,6 +66,8 @@ struct gpiochip_info {
  * @GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN: line has pull-down bias enabled
  * @GPIO_V2_LINE_FLAG_BIAS_DISABLED: line has bias disabled
  * @GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME: line events contain REALTIME timestamps
+ * @GPIO_V2_LINE_FLAG_EVENT_CLOCK_HTE: line events contain timestamps from
+ * hardware timestamp engine
  */
 enum gpio_v2_line_flag {
 	GPIO_V2_LINE_FLAG_USED			= _BITULL(0),
@@ -80,6 +82,7 @@ enum gpio_v2_line_flag {
 	GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN	= _BITULL(9),
 	GPIO_V2_LINE_FLAG_BIAS_DISABLED		= _BITULL(10),
 	GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME	= _BITULL(11),
+	GPIO_V2_LINE_FLAG_EVENT_CLOCK_HTE	= _BITULL(12),
 };
 
 /**
-- 
cgit v1.2.3


From 9ab4807c84a4aacfc9b4f79cc81254035e0ec361 Mon Sep 17 00:00:00 2001
From: Kishen Maloor <kishen.maloor@intel.com>
Date: Tue, 3 May 2022 19:38:52 -0700
Subject: mptcp: netlink: Add MPTCP_PM_CMD_ANNOUNCE

This change adds a MPTCP netlink interface for issuing
ADD_ADDR advertisements over the chosen MPTCP connection from a
userspace path manager.

The command requires the following parameters:
{ token, { loc_id, family, daddr4 | daddr6 [, dport] } [, if_idx],
flags[signal] }.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Kishen Maloor <kishen.maloor@intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mptcp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index e41ea01a94bb..ac66c1263f02 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -55,6 +55,7 @@ enum {
 	MPTCP_PM_ATTR_ADDR,				/* nested address */
 	MPTCP_PM_ATTR_RCV_ADD_ADDRS,			/* u32 */
 	MPTCP_PM_ATTR_SUBFLOWS,				/* u32 */
+	MPTCP_PM_ATTR_TOKEN,				/* u32 */
 
 	__MPTCP_PM_ATTR_MAX
 };
@@ -93,6 +94,7 @@ enum {
 	MPTCP_PM_CMD_SET_LIMITS,
 	MPTCP_PM_CMD_GET_LIMITS,
 	MPTCP_PM_CMD_SET_FLAGS,
+	MPTCP_PM_CMD_ANNOUNCE,
 
 	__MPTCP_PM_CMD_AFTER_LAST
 };
-- 
cgit v1.2.3


From d9a4594edabf125dc17dfd52acc722c3de1cb44c Mon Sep 17 00:00:00 2001
From: Kishen Maloor <kishen.maloor@intel.com>
Date: Tue, 3 May 2022 19:38:54 -0700
Subject: mptcp: netlink: Add MPTCP_PM_CMD_REMOVE

This change adds a MPTCP netlink command for issuing a
REMOVE_ADDR signal for an address over the chosen MPTCP
connection from a userspace path manager.

The command requires the following parameters: {token, loc_id}.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Kishen Maloor <kishen.maloor@intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mptcp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index ac66c1263f02..11f9fa001a3c 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -56,6 +56,7 @@ enum {
 	MPTCP_PM_ATTR_RCV_ADD_ADDRS,			/* u32 */
 	MPTCP_PM_ATTR_SUBFLOWS,				/* u32 */
 	MPTCP_PM_ATTR_TOKEN,				/* u32 */
+	MPTCP_PM_ATTR_LOC_ID,				/* u8 */
 
 	__MPTCP_PM_ATTR_MAX
 };
@@ -95,6 +96,7 @@ enum {
 	MPTCP_PM_CMD_GET_LIMITS,
 	MPTCP_PM_CMD_SET_FLAGS,
 	MPTCP_PM_CMD_ANNOUNCE,
+	MPTCP_PM_CMD_REMOVE,
 
 	__MPTCP_PM_CMD_AFTER_LAST
 };
-- 
cgit v1.2.3


From 702c2f646d42cfd9e31133d68a8283fea48fd810 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 3 May 2022 19:38:56 -0700
Subject: mptcp: netlink: allow userspace-driven subflow establishment

This allows userspace to tell kernel to add a new subflow to an existing
mptcp connection.

Userspace provides the token to identify the mptcp-level connection
that needs a change in active subflows and the local and remote
addresses of the new or the to-be-removed subflow.

MPTCP_PM_CMD_SUBFLOW_CREATE requires the following parameters:
{ token, { loc_id, family, loc_addr4 | loc_addr6 }, { family, rem_addr4 |
rem_addr6, rem_port }

MPTCP_PM_CMD_SUBFLOW_DESTROY requires the following parameters:
{ token, { family, loc_addr4 | loc_addr6, loc_port }, { family, rem_addr4 |
rem_addr6, rem_port }

Acked-by: Paolo Abeni <pabeni@redhat.com>
Co-developed-by: Kishen Maloor <kishen.maloor@intel.com>
Signed-off-by: Kishen Maloor <kishen.maloor@intel.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mptcp.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 11f9fa001a3c..921963589904 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -57,6 +57,7 @@ enum {
 	MPTCP_PM_ATTR_SUBFLOWS,				/* u32 */
 	MPTCP_PM_ATTR_TOKEN,				/* u32 */
 	MPTCP_PM_ATTR_LOC_ID,				/* u8 */
+	MPTCP_PM_ATTR_ADDR_REMOTE,			/* nested address */
 
 	__MPTCP_PM_ATTR_MAX
 };
@@ -97,6 +98,8 @@ enum {
 	MPTCP_PM_CMD_SET_FLAGS,
 	MPTCP_PM_CMD_ANNOUNCE,
 	MPTCP_PM_CMD_REMOVE,
+	MPTCP_PM_CMD_SUBFLOW_CREATE,
+	MPTCP_PM_CMD_SUBFLOW_DESTROY,
 
 	__MPTCP_PM_CMD_AFTER_LAST
 };
-- 
cgit v1.2.3


From 36f8423597000bd7d5e48b7b306e1d0958e72359 Mon Sep 17 00:00:00 2001
From: Muna Sinada <quic_msinada@quicinc.com>
Date: Wed, 23 Mar 2022 15:46:35 -0700
Subject: cfg80211: support disabling EHT mode

Allow userspace to disable EHT mode during association.

Signed-off-by: Muna Sinada <quic_msinada@quicinc.com>
Signed-off-by: Aloka Dixit <quic_alokad@quicinc.com>
Link: https://lore.kernel.org/r/20220323224636.20211-1-quic_alokad@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 0568a79097b8..d9490e3062a7 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3175,6 +3175,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_EHT_CAPABILITY,
 
+	NL80211_ATTR_DISABLE_EHT,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
-- 
cgit v1.2.3


From c4a67a21a6d255ddcbaa076c0412aad73c7e0c02 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 4 May 2022 08:40:37 -0700
Subject: Revert "Merge branch 'mlxsw-line-card-model'"

This reverts commit 5e927a9f4b9f29d78a7c7d66ea717bb5c8bbad8e, reversing
changes made to cfc1d91a7d78cf9de25b043d81efcc16966d55b3.

The discussion is still ongoing so let's remove the uAPI
until the discussion settles.

Link: https://lore.kernel.org/all/20220425090021.32e9a98f@kernel.org/
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://lore.kernel.org/r/20220504154037.539442-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/devlink.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index fb8c3864457f..b3d40a5d72ff 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -136,8 +136,6 @@ enum devlink_command {
 	DEVLINK_CMD_LINECARD_NEW,
 	DEVLINK_CMD_LINECARD_DEL,
 
-	DEVLINK_CMD_LINECARD_INFO_GET,		/* can dump */
-
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -577,9 +575,6 @@ enum devlink_attr {
 	DEVLINK_ATTR_LINECARD_STATE,		/* u8 */
 	DEVLINK_ATTR_LINECARD_TYPE,		/* string */
 	DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES,	/* nested */
-	DEVLINK_ATTR_LINECARD_DEVICE_LIST,	/* nested */
-	DEVLINK_ATTR_LINECARD_DEVICE,		/* nested */
-	DEVLINK_ATTR_LINECARD_DEVICE_INDEX,	/* u32 */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
-- 
cgit v1.2.3


From 0455d4ccec548b0fb51db39a4d3350a7a80a0222 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 26 Apr 2022 12:11:33 -0600
Subject: io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg

If IORING_RECVSEND_POLL_FIRST is set for recv/recvmsg or send/sendmsg,
then we arm poll first rather than attempt a receive or send upfront.
This can be useful if we expect there to be no data (or space) available
for the request, as we can then avoid wasting time on the initial
issue attempt.

Reviewed-by: Hao Xu <howeyxu@tencent.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index fad63564678a..06621a278cb6 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -213,6 +213,16 @@ enum {
 #define IORING_ASYNC_CANCEL_FD	(1U << 1)
 #define IORING_ASYNC_CANCEL_ANY	(1U << 2)
 
+/*
+ * send/sendmsg and recv/recvmsg flags (sqe->addr2)
+ *
+ * IORING_RECVSEND_POLL_FIRST	If set, instead of first attempting to send
+ *				or receive and arm poll if that yields an
+ *				-EAGAIN result, arm poll upfront and skip
+ *				the initial transfer attempt.
+ */
+#define IORING_RECVSEND_POLL_FIRST	(1U << 0)
+
 /*
  * IO completion data structure (Completion Queue Entry)
  */
-- 
cgit v1.2.3


From 6cc2df8e3a3967e7c13a424f87f6efb1d4a62d80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= <mic@digikod.net>
Date: Fri, 6 May 2022 18:05:07 +0200
Subject: landlock: Add clang-format exceptions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation to a following commit, add clang-format on and
clang-format off stanzas around constant definitions.  This enables to
keep aligned values, which is much more readable than packed
definitions.

Link: https://lore.kernel.org/r/20220506160513.523257-2-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 include/uapi/linux/landlock.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index b3d952067f59..15c31abb0d76 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -33,7 +33,9 @@ struct landlock_ruleset_attr {
  * - %LANDLOCK_CREATE_RULESET_VERSION: Get the highest supported Landlock ABI
  *   version.
  */
+/* clang-format off */
 #define LANDLOCK_CREATE_RULESET_VERSION			(1U << 0)
+/* clang-format on */
 
 /**
  * enum landlock_rule_type - Landlock rule type
@@ -120,6 +122,7 @@ struct landlock_path_beneath_attr {
  *   :manpage:`access(2)`.
  *   Future Landlock evolutions will enable to restrict them.
  */
+/* clang-format off */
 #define LANDLOCK_ACCESS_FS_EXECUTE			(1ULL << 0)
 #define LANDLOCK_ACCESS_FS_WRITE_FILE			(1ULL << 1)
 #define LANDLOCK_ACCESS_FS_READ_FILE			(1ULL << 2)
@@ -133,5 +136,6 @@ struct landlock_path_beneath_attr {
 #define LANDLOCK_ACCESS_FS_MAKE_FIFO			(1ULL << 10)
 #define LANDLOCK_ACCESS_FS_MAKE_BLOCK			(1ULL << 11)
 #define LANDLOCK_ACCESS_FS_MAKE_SYM			(1ULL << 12)
+/* clang-format on */
 
 #endif /* _UAPI_LINUX_LANDLOCK_H */
-- 
cgit v1.2.3


From a36e07dfe6ee71e209383ea9288cd8d1617e14f9 Mon Sep 17 00:00:00 2001
From: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Date: Fri, 6 May 2022 17:24:54 +0000
Subject: rfkill: uapi: fix RFKILL_IOCTL_MAX_SIZE ioctl request definition

The definition of RFKILL_IOCTL_MAX_SIZE introduced by commit
54f586a91532 ("rfkill: make new event layout opt-in") is unusable
since it is based on RFKILL_IOC_EXT_SIZE which has not been defined.
Fix that by replacing the undefined constant with the constant which
is intended to be used in this definition.

Fixes: 54f586a91532 ("rfkill: make new event layout opt-in")
Cc: stable@vger.kernel.org # 5.11+
Signed-off-by: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Link: https://lore.kernel.org/r/20220506172454.120319-1-glebfm@altlinux.org
[add commit message provided later by Dmitry]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/rfkill.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h
index 283c5a7b3f2c..db6c8588c1d0 100644
--- a/include/uapi/linux/rfkill.h
+++ b/include/uapi/linux/rfkill.h
@@ -184,7 +184,7 @@ struct rfkill_event_ext {
 #define RFKILL_IOC_NOINPUT	1
 #define RFKILL_IOCTL_NOINPUT	_IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT)
 #define RFKILL_IOC_MAX_SIZE	2
-#define RFKILL_IOCTL_MAX_SIZE	_IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_EXT_SIZE, __u32)
+#define RFKILL_IOCTL_MAX_SIZE	_IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_MAX_SIZE, __u32)
 
 /* and that's all userspace gets */
 
-- 
cgit v1.2.3


From ebdeb7c01d025cb059f05dc26b9dc914e46dd43f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 31 Mar 2022 19:27:52 -0600
Subject: io_uring: add support for 128-byte SQEs

Normal SQEs are 64-bytes in length, which is fine for all the commands
we support. However, in preparation for supporting passthrough IO,
provide an option for setting up a ring with 128-byte SQEs.

We continue to use the same type for io_uring_sqe, it's marked and
commented with a zero sized array pad at the end. This provides up
to 80 bytes of data for a passthrough command - 64 bytes for the
extra added data, and 16 bytes available at the end of the existing
SQE.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 31e719f38615..ee84132cadad 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -63,6 +63,12 @@ struct io_uring_sqe {
 	};
 	__u64	addr3;
 	__u64	__pad2[1];
+
+	/*
+	 * If the ring is initialized with IORING_SETUP_SQE128, then this field
+	 * contains 64-bytes of padding, doubling the size of the SQE.
+	 */
+	__u64	__big_sqe_pad[0];
 };
 
 enum {
@@ -119,6 +125,8 @@ enum {
  */
 #define IORING_SETUP_TASKRUN_FLAG	(1U << 9)
 
+#define IORING_SETUP_SQE128		(1U << 10) /* SQEs are 128 byte */
+
 enum io_uring_op {
 	IORING_OP_NOP,
 	IORING_OP_READV,
-- 
cgit v1.2.3


From 7a51e5b44b92686eebd3e1b46b86e1eb4db975db Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@fb.com>
Date: Tue, 26 Apr 2022 11:21:23 -0700
Subject: io_uring: support CQE32 in io_uring_cqe

This adds the big_cqe array to the struct io_uring_cqe to support large
CQE's.

Co-developed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Stefan Roesch <shr@fb.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Link: https://lore.kernel.org/r/20220426182134.136504-2-shr@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ee84132cadad..ac2d90d669c3 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -126,6 +126,7 @@ enum {
 #define IORING_SETUP_TASKRUN_FLAG	(1U << 9)
 
 #define IORING_SETUP_SQE128		(1U << 10) /* SQEs are 128 byte */
+#define IORING_SETUP_CQE32		(1U << 11) /* CQEs are 32 byte */
 
 enum io_uring_op {
 	IORING_OP_NOP,
@@ -245,6 +246,12 @@ struct io_uring_cqe {
 	__u64	user_data;	/* sqe->data submission passed back */
 	__s32	res;		/* result code for this event */
 	__u32	flags;
+
+	/*
+	 * If the ring is initialized with IORING_SETUP_CQE32, then this field
+	 * contains 16-bytes of padding, doubling the size of the CQE.
+	 */
+	__u64 big_cqe[];
 };
 
 /*
-- 
cgit v1.2.3


From bd32889e841c12533d09a1bd02bba932baa9ed8f Mon Sep 17 00:00:00 2001
From: Carlos Llamas <cmllamas@google.com>
Date: Fri, 29 Apr 2022 23:56:41 +0000
Subject: binder: add BINDER_GET_EXTENDED_ERROR ioctl

Provide a userspace mechanism to pull precise error information upon
failed operations. Extending the current error codes returned by the
interfaces allows userspace to better determine the course of action.
This could be for instance, retrying a failed transaction at a later
point and thus offloading the error handling from the driver.

Acked-by: Christian Brauner (Microsoft) <brauner@kernel.org>
Acked-by: Todd Kjos <tkjos@google.com>
Signed-off-by: Carlos Llamas <cmllamas@google.com>
Link: https://lore.kernel.org/r/20220429235644.697372-3-cmllamas@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/android/binder.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index 11157fae8a8e..e6ee8cae303b 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -236,6 +236,21 @@ struct binder_frozen_status_info {
 	__u32            async_recv;
 };
 
+/* struct binder_extened_error - extended error information
+ * @id:		identifier for the failed operation
+ * @command:	command as defined by binder_driver_return_protocol
+ * @param:	parameter holding a negative errno value
+ *
+ * Used with BINDER_GET_EXTENDED_ERROR. This extends the error information
+ * returned by the driver upon a failed operation. Userspace can pull this
+ * data to properly handle specific error scenarios.
+ */
+struct binder_extended_error {
+	__u32	id;
+	__u32	command;
+	__s32	param;
+};
+
 #define BINDER_WRITE_READ		_IOWR('b', 1, struct binder_write_read)
 #define BINDER_SET_IDLE_TIMEOUT		_IOW('b', 3, __s64)
 #define BINDER_SET_MAX_THREADS		_IOW('b', 5, __u32)
@@ -249,6 +264,7 @@ struct binder_frozen_status_info {
 #define BINDER_FREEZE			_IOW('b', 14, struct binder_freeze_info)
 #define BINDER_GET_FROZEN_INFO		_IOWR('b', 15, struct binder_frozen_status_info)
 #define BINDER_ENABLE_ONEWAY_SPAM_DETECTION	_IOW('b', 16, __u32)
+#define BINDER_GET_EXTENDED_ERROR	_IOWR('b', 17, struct binder_extended_error)
 
 /*
  * NOTE: Two special error codes you should check for when calling
-- 
cgit v1.2.3


From 7ff960a6fe399fdcbca6159063684671ae57eee9 Mon Sep 17 00:00:00 2001
From: Shunsuke Mie <mie@igel.co.jp>
Date: Tue, 10 May 2022 19:27:23 +0900
Subject: virtio: fix virtio transitional ids

This commit fixes the transitional PCI device ID.

Fixes: d61914ea6ada ("virtio: update virtio id table, add transitional ids")
Signed-off-by: Shunsuke Mie <mie@igel.co.jp>
Link: https://lore.kernel.org/r/20220510102723.87666-1-mie@igel.co.jp
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_ids.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 80d76b75bccd..7aa2eb766205 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -73,12 +73,12 @@
  * Virtio Transitional IDs
  */
 
-#define VIRTIO_TRANS_ID_NET		1000 /* transitional virtio net */
-#define VIRTIO_TRANS_ID_BLOCK		1001 /* transitional virtio block */
-#define VIRTIO_TRANS_ID_BALLOON		1002 /* transitional virtio balloon */
-#define VIRTIO_TRANS_ID_CONSOLE		1003 /* transitional virtio console */
-#define VIRTIO_TRANS_ID_SCSI		1004 /* transitional virtio SCSI */
-#define VIRTIO_TRANS_ID_RNG		1005 /* transitional virtio rng */
-#define VIRTIO_TRANS_ID_9P		1009 /* transitional virtio 9p console */
+#define VIRTIO_TRANS_ID_NET		0x1000 /* transitional virtio net */
+#define VIRTIO_TRANS_ID_BLOCK		0x1001 /* transitional virtio block */
+#define VIRTIO_TRANS_ID_BALLOON		0x1002 /* transitional virtio balloon */
+#define VIRTIO_TRANS_ID_CONSOLE		0x1003 /* transitional virtio console */
+#define VIRTIO_TRANS_ID_SCSI		0x1004 /* transitional virtio SCSI */
+#define VIRTIO_TRANS_ID_RNG		0x1005 /* transitional virtio rng */
+#define VIRTIO_TRANS_ID_9P		0x1009 /* transitional virtio 9p console */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
-- 
cgit v1.2.3


From c23d47abee3a54e4991ed3993340596d04aabd6a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Apr 2022 08:33:03 +0200
Subject: loop: remove most the top-of-file boilerplate comment from the UAPI
 header

Just leave the SPDX marker and the copyright notice and remove the
irrelevant rest.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220419063303.583106-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/loop.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index 98e60801195e..6f63527dd2ed 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -1,11 +1,6 @@
 /* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */
 /*
- * include/linux/loop.h
- *
- * Written by Theodore Ts'o, 3/29/93.
- *
- * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
- * permitted under the GNU General Public License.
+ * Copyright 1993 by Theodore Ts'o.
  */
 #ifndef _UAPI_LINUX_LOOP_H
 #define _UAPI_LINUX_LOOP_H
-- 
cgit v1.2.3


From 26101f5ab6bdf30ac25c8e578e0b4873e7849e0c Mon Sep 17 00:00:00 2001
From: Kaixi Fan <fankaixi.li@bytedance.com>
Date: Sat, 30 Apr 2022 15:48:42 +0800
Subject: bpf: Add source ip in "struct bpf_tunnel_key"

Add tunnel source ip field in "struct bpf_tunnel_key". Add related code
to set and get tunnel source field.

Signed-off-by: Kaixi Fan <fankaixi.li@bytedance.com>
Link: https://lore.kernel.org/r/20220430074844.69214-2-fankaixi.li@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 444fe6f1cf35..95a3d1ff6255 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5604,6 +5604,10 @@ struct bpf_tunnel_key {
 	__u8 tunnel_ttl;
 	__u16 tunnel_ext;	/* Padding, future use. */
 	__u32 tunnel_label;
+	union {
+		__u32 local_ipv4;
+		__u32 local_ipv6[4];
+	};
 };
 
 /* user accessible mirror of in-kernel xfrm_state.
-- 
cgit v1.2.3


From f7e0beaf39d3868dc700d4954b26cf8443c5d423 Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@fb.com>
Date: Tue, 10 May 2022 13:59:19 -0700
Subject: bpf, x86: Generate trampolines from bpf_tramp_links

Replace struct bpf_tramp_progs with struct bpf_tramp_links to collect
struct bpf_tramp_link(s) for a trampoline.  struct bpf_tramp_link
extends bpf_link to act as a linked list node.

arch_prepare_bpf_trampoline() accepts a struct bpf_tramp_links to
collects all bpf_tramp_link(s) that a trampoline should call.

Change BPF trampoline and bpf_struct_ops to pass bpf_tramp_links
instead of bpf_tramp_progs.

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220510205923.3206889-2-kuifeng@fb.com
---
 include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 95a3d1ff6255..3d032ea1b6a3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1013,6 +1013,7 @@ enum bpf_link_type {
 	BPF_LINK_TYPE_XDP = 6,
 	BPF_LINK_TYPE_PERF_EVENT = 7,
 	BPF_LINK_TYPE_KPROBE_MULTI = 8,
+	BPF_LINK_TYPE_STRUCT_OPS = 9,
 
 	MAX_BPF_LINK_TYPE,
 };
-- 
cgit v1.2.3


From 2fcc82411e74e5e6aba336561cf56fb899bfae4e Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@fb.com>
Date: Tue, 10 May 2022 13:59:21 -0700
Subject: bpf, x86: Attach a cookie to fentry/fexit/fmod_ret/lsm.

Pass a cookie along with BPF_LINK_CREATE requests.

Add a bpf_cookie field to struct bpf_tracing_link to attach a cookie.
The cookie of a bpf_tracing_link is available by calling
bpf_get_attach_cookie when running the BPF program of the attached
link.

The value of a cookie will be set at bpf_tramp_run_ctx by the
trampoline of the link.

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220510205923.3206889-4-kuifeng@fb.com
---
 include/uapi/linux/bpf.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3d032ea1b6a3..bc7f89948f54 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1490,6 +1490,15 @@ union bpf_attr {
 				__aligned_u64	addrs;
 				__aligned_u64	cookies;
 			} kprobe_multi;
+			struct {
+				/* this is overlaid with the target_btf_id above. */
+				__u32		target_btf_id;
+				/* black box user-provided value passed through
+				 * to BPF program at the execution time and
+				 * accessible through bpf_get_attach_cookie() BPF helper
+				 */
+				__u64		cookie;
+			} tracing;
 		};
 	} link_create;
 
-- 
cgit v1.2.3


From a8641d7d8500d41d312350470464e03f3df3672a Mon Sep 17 00:00:00 2001
From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Date: Mon, 9 May 2022 18:50:25 +0530
Subject: HID: amd_sfh: Move bus declaration outside of amd-sfh

This should allow external drivers to reference this bus ID
reservation and detect data coming from amd-sfh.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/uapi/linux/input.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index ee3127461ee0..ef4257ab3026 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -271,6 +271,7 @@ struct input_mask {
 #define BUS_RMI			0x1D
 #define BUS_CEC			0x1E
 #define BUS_INTEL_ISHTP		0x1F
+#define BUS_AMD_SFH		0x20
 
 /*
  * MT_TOOL types
-- 
cgit v1.2.3


From ee692a21e9bf8354bd3ec816f1cf4bff8619ed77 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 11 May 2022 11:17:45 +0530
Subject: fs,io_uring: add infrastructure for uring-cmd

file_operations->uring_cmd is a file private handler.
This is somewhat similar to ioctl but hopefully a lot more sane and
useful as it can be used to enable many io_uring capabilities for the
underlying operation.

IORING_OP_URING_CMD is a file private kind of request. io_uring doesn't
know what is in this command type, it's for the provider of ->uring_cmd()
to deal with.

Co-developed-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220511054750.20432-2-joshi.k@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ac2d90d669c3..23618be55dd2 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -22,6 +22,7 @@ struct io_uring_sqe {
 	union {
 		__u64	off;	/* offset into file */
 		__u64	addr2;
+		__u32	cmd_op;
 	};
 	union {
 		__u64	addr;	/* pointer to buffer or iovecs */
@@ -61,14 +62,17 @@ struct io_uring_sqe {
 		__s32	splice_fd_in;
 		__u32	file_index;
 	};
-	__u64	addr3;
-	__u64	__pad2[1];
-
-	/*
-	 * If the ring is initialized with IORING_SETUP_SQE128, then this field
-	 * contains 64-bytes of padding, doubling the size of the SQE.
-	 */
-	__u64	__big_sqe_pad[0];
+	union {
+		struct {
+			__u64	addr3;
+			__u64	__pad2[1];
+		};
+		/*
+		 * If the ring is initialized with IORING_SETUP_SQE128, then
+		 * this field is used for 80 bytes of arbitrary command data
+		 */
+		__u8	cmd[0];
+	};
 };
 
 enum {
@@ -175,6 +179,7 @@ enum io_uring_op {
 	IORING_OP_FGETXATTR,
 	IORING_OP_GETXATTR,
 	IORING_OP_SOCKET,
+	IORING_OP_URING_CMD,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
-- 
cgit v1.2.3


From 456cba386e94f22fa1b1426303fdcac9e66b1417 Mon Sep 17 00:00:00 2001
From: Kanchan Joshi <joshi.k@samsung.com>
Date: Wed, 11 May 2022 11:17:48 +0530
Subject: nvme: wire-up uring-cmd support for io-passthru on char-device.

Introduce handler for fops->uring_cmd(), implementing async passthru
on char device (/dev/ngX). The handler supports newly introduced
operation NVME_URING_CMD_IO. This operates on a new structure
nvme_uring_cmd, which is similar to struct nvme_passthru_cmd64 but
without the embedded 8b result field. This field is not needed since
uring-cmd allows to return additional result via big-CQE.

Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220511054750.20432-5-joshi.k@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/nvme_ioctl.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
index b2e43185e3b5..04e458c649ab 100644
--- a/include/uapi/linux/nvme_ioctl.h
+++ b/include/uapi/linux/nvme_ioctl.h
@@ -70,6 +70,28 @@ struct nvme_passthru_cmd64 {
 	__u64	result;
 };
 
+/* same as struct nvme_passthru_cmd64, minus the 8b result field */
+struct nvme_uring_cmd {
+	__u8	opcode;
+	__u8	flags;
+	__u16	rsvd1;
+	__u32	nsid;
+	__u32	cdw2;
+	__u32	cdw3;
+	__u64	metadata;
+	__u64	addr;
+	__u32	metadata_len;
+	__u32	data_len;
+	__u32	cdw10;
+	__u32	cdw11;
+	__u32	cdw12;
+	__u32	cdw13;
+	__u32	cdw14;
+	__u32	cdw15;
+	__u32	timeout_ms;
+	__u32   rsvd2;
+};
+
 #define nvme_admin_cmd nvme_passthru_cmd
 
 #define NVME_IOCTL_ID		_IO('N', 0x40)
@@ -83,4 +105,7 @@ struct nvme_passthru_cmd64 {
 #define NVME_IOCTL_IO64_CMD	_IOWR('N', 0x48, struct nvme_passthru_cmd64)
 #define NVME_IOCTL_IO64_CMD_VEC	_IOWR('N', 0x49, struct nvme_passthru_cmd64)
 
+/* io_uring async commands: */
+#define NVME_URING_CMD_IO	_IOWR('N', 0x80, struct nvme_uring_cmd)
+
 #endif /* _UAPI_LINUX_NVME_IOCTL_H */
-- 
cgit v1.2.3


From f569add47119fa910ed7711b26b8d38e21f7ea77 Mon Sep 17 00:00:00 2001
From: Anuj Gupta <anuj20.g@samsung.com>
Date: Wed, 11 May 2022 11:17:49 +0530
Subject: nvme: add vectored-io support for uring-cmd

wire up support for async passthru that takes an array of buffers (using
iovec). Exposed via a new op NVME_URING_CMD_IO_VEC. Same 'struct
nvme_uring_cmd' is to be used with -

1. cmd.addr as base address of user iovec array
2. cmd.data_len as count of iovec array elements

Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220511054750.20432-6-joshi.k@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/nvme_ioctl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
index 04e458c649ab..0b1876aa5a59 100644
--- a/include/uapi/linux/nvme_ioctl.h
+++ b/include/uapi/linux/nvme_ioctl.h
@@ -107,5 +107,6 @@ struct nvme_uring_cmd {
 
 /* io_uring async commands: */
 #define NVME_URING_CMD_IO	_IOWR('N', 0x80, struct nvme_uring_cmd)
+#define NVME_URING_CMD_IO_VEC	_IOWR('N', 0x81, struct nvme_uring_cmd)
 
 #endif /* _UAPI_LINUX_NVME_IOCTL_H */
-- 
cgit v1.2.3


From 07343110b293456d30393e89b86c4dee1ac051c8 Mon Sep 17 00:00:00 2001
From: Feng Zhou <zhoufeng.zf@bytedance.com>
Date: Wed, 11 May 2022 17:38:53 +0800
Subject: bpf: add bpf_map_lookup_percpu_elem for percpu map

Add new ebpf helpers bpf_map_lookup_percpu_elem.

The implementation method is relatively simple, refer to the implementation
method of map_lookup_elem of percpu map, increase the parameters of cpu, and
obtain it according to the specified cpu.

Signed-off-by: Feng Zhou <zhoufeng.zf@bytedance.com>
Link: https://lore.kernel.org/r/20220511093854.411-2-zhoufeng.zf@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bc7f89948f54..0210f85131b3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5164,6 +5164,14 @@ union bpf_attr {
  *		if not NULL, is a reference which must be released using its
  *		corresponding release function, or moved into a BPF map before
  *		program exit.
+ *
+ * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)
+ * 	Description
+ * 		Perform a lookup in *percpu map* for an entry associated to
+ * 		*key* on *cpu*.
+ * 	Return
+ * 		Map value associated to *key* on *cpu*, or **NULL** if no entry
+ * 		was found or *cpu* is invalid.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5361,6 +5369,7 @@ union bpf_attr {
 	FN(skb_set_tstamp),		\
 	FN(ima_file_hash),		\
 	FN(kptr_xchg),			\
+	FN(map_lookup_percpu_elem),     \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From c9b516f16be5896a3d798f8efb03acbd2ceec715 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 12 May 2022 20:38:36 -0700
Subject: ELF, uapi: fixup ELF_ST_TYPE definition

This is very theoretical compile failure:

	ELF_ST_TYPE(st_info = A)

Cast will bind first and st_info will stop being lvalue:

	error: lvalue required as left operand of assignment

Given that the only use of this macro is

	ELF_ST_TYPE(sym->st_info)

where st_info is "unsigned char" I've decided to remove cast especially
given that companion macro ELF_ST_BIND doesn't use cast.

Link: https://lkml.kernel.org/r/Ymv7G1BeX4kt3obz@localhost.localdomain
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/uapi/linux/elf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 787c657bfae8..237f21a5e0f6 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -134,7 +134,7 @@ typedef __s64	Elf64_Sxword;
 #define STT_TLS     6
 
 #define ELF_ST_BIND(x)		((x) >> 4)
-#define ELF_ST_TYPE(x)		(((unsigned int) x) & 0xf)
+#define ELF_ST_TYPE(x)		((x) & 0xf)
 #define ELF32_ST_BIND(x)	ELF_ST_BIND(x)
 #define ELF32_ST_TYPE(x)	ELF_ST_TYPE(x)
 #define ELF64_ST_BIND(x)	ELF_ST_BIND(x)
-- 
cgit v1.2.3


From 783eb354fb3dcd598e8e7e8a2ed88c0fb6ce5d2f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 4 Apr 2022 15:19:41 +0900
Subject: agpgart.h: do not include <stdlib.h> from exported header

Commit 35d0f1d54ecd ("include/uapi/linux/agpgart.h: include stdlib.h in
userspace") included <stdlib.h> to fix the unknown size_t error, but
I do not think it is the right fix.

This header already uses __kernel_size_t a few lines below.

Replace the remaining size_t, and stop including <stdlib.h>.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
---
 include/uapi/linux/agpgart.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/agpgart.h b/include/uapi/linux/agpgart.h
index f5251045181a..9cc3448c0b5b 100644
--- a/include/uapi/linux/agpgart.h
+++ b/include/uapi/linux/agpgart.h
@@ -52,7 +52,6 @@
 
 #ifndef __KERNEL__
 #include <linux/types.h>
-#include <stdlib.h>
 
 struct agp_version {
 	__u16 major;
@@ -64,10 +63,10 @@ typedef struct _agp_info {
 	__u32 bridge_id;	/* bridge vendor/device         */
 	__u32 agp_mode;		/* mode info of bridge          */
 	unsigned long aper_base;/* base of aperture             */
-	size_t aper_size;	/* size of aperture             */
-	size_t pg_total;	/* max pages (swap + system)    */
-	size_t pg_system;	/* max pages (system)           */
-	size_t pg_used;		/* current pages used           */
+	__kernel_size_t aper_size;	/* size of aperture             */
+	__kernel_size_t pg_total;	/* max pages (swap + system)    */
+	__kernel_size_t pg_system;	/* max pages (system)           */
+	__kernel_size_t pg_used;	/* current pages used           */
 } agp_info;
 
 typedef struct _agp_setup {
-- 
cgit v1.2.3


From 1339f24b336db5ded9811f3fe7b948e0de207785 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 7 May 2022 14:18:44 -0600
Subject: io_uring: allow allocated fixed files for openat/openat2

If the application passes in IORING_FILE_INDEX_ALLOC as the file_slot,
then that's a hint to allocate a fixed file descriptor rather than have
one be passed in directly.

This can be useful for having io_uring manage the direct descriptor space.

Normal open direct requests will complete with 0 for success, and < 0
in case of error. If io_uring is asked to allocated the direct descriptor,
then the direct descriptor is returned in case of success.

Reviewed-by: Hao Xu <howeyxu@tencent.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 06621a278cb6..b7f02a55032a 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -63,6 +63,15 @@ struct io_uring_sqe {
 	__u64	__pad2[2];
 };
 
+/*
+ * If sqe->file_index is set to this for opcodes that instantiate a new
+ * direct descriptor (like openat/openat2/accept), then io_uring will allocate
+ * an available direct descriptor instead of having the application pass one
+ * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE
+ * if the space is full.
+ */
+#define IORING_FILE_INDEX_ALLOC		(~0U)
+
 enum {
 	IOSQE_FIXED_FILE_BIT,
 	IOSQE_IO_DRAIN_BIT,
-- 
cgit v1.2.3


From a8da73a32b6e9271a613e5a0e90a8c35f40abeb8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 9 May 2022 09:29:14 -0600
Subject: io_uring: add flag for allocating a fully sparse direct descriptor
 space

Currently to setup a fully sparse descriptor space upfront, the app needs
to alloate an array of the full size and memset it to -1 and then pass
that in. Make this a bit easier by allowing a flag that simply does
this internally rather than needing to copy each slot separately.

This works with IORING_REGISTER_FILES2 as the flag is set in struct
io_uring_rsrc_register, and is only allow when the type is
IORING_RSRC_FILE as this doesn't make sense for registered buffers.

Reviewed-by: Hao Xu <howeyxu@tencent.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index b7f02a55032a..36ec43dc7bf9 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -396,9 +396,15 @@ struct io_uring_files_update {
 	__aligned_u64 /* __s32 * */ fds;
 };
 
+/*
+ * Register a fully sparse file space, rather than pass in an array of all
+ * -1 file descriptors.
+ */
+#define IORING_RSRC_REGISTER_SPARSE	(1U << 0)
+
 struct io_uring_rsrc_register {
 	__u32 nr;
-	__u32 resv;
+	__u32 flags;
 	__u64 resv2;
 	__aligned_u64 data;
 	__aligned_u64 tags;
-- 
cgit v1.2.3


From b1f9e876862d8f7176299ec4fb2108bc1045cbc8 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Thu, 12 May 2022 20:22:56 -0700
Subject: mm/uffd: enable write protection for shmem & hugetlbfs

We've had all the necessary changes ready for both shmem and hugetlbfs.
Turn on all the shmem/hugetlbfs switches for userfaultfd-wp.

We can expand UFFD_API_RANGE_IOCTLS_BASIC with _UFFDIO_WRITEPROTECT too
because all existing types now support write protection mode.

Since vma_can_userfault() will be used elsewhere, move into userfaultfd_k.h.

Link: https://lkml.kernel.org/r/20220405014926.15101-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/uapi/linux/userfaultfd.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index ef739054cb1c..7d32b1e797fb 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -33,7 +33,8 @@
 			   UFFD_FEATURE_THREAD_ID |		\
 			   UFFD_FEATURE_MINOR_HUGETLBFS |	\
 			   UFFD_FEATURE_MINOR_SHMEM |		\
-			   UFFD_FEATURE_EXACT_ADDRESS)
+			   UFFD_FEATURE_EXACT_ADDRESS |		\
+			   UFFD_FEATURE_WP_HUGETLBFS_SHMEM)
 #define UFFD_API_IOCTLS				\
 	((__u64)1 << _UFFDIO_REGISTER |		\
 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
@@ -47,7 +48,8 @@
 #define UFFD_API_RANGE_IOCTLS_BASIC		\
 	((__u64)1 << _UFFDIO_WAKE |		\
 	 (__u64)1 << _UFFDIO_COPY |		\
-	 (__u64)1 << _UFFDIO_CONTINUE)
+	 (__u64)1 << _UFFDIO_CONTINUE |		\
+	 (__u64)1 << _UFFDIO_WRITEPROTECT)
 
 /*
  * Valid ioctl command number range with this API is from 0x00 to
@@ -194,6 +196,9 @@ struct uffdio_api {
 	 * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page
 	 * faults would be provided and the offset within the page would not be
 	 * masked.
+	 *
+	 * UFFD_FEATURE_WP_HUGETLBFS_SHMEM indicates that userfaultfd
+	 * write-protection mode is supported on both shmem and hugetlbfs.
 	 */
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
 #define UFFD_FEATURE_EVENT_FORK			(1<<1)
@@ -207,6 +212,7 @@ struct uffdio_api {
 #define UFFD_FEATURE_MINOR_HUGETLBFS		(1<<9)
 #define UFFD_FEATURE_MINOR_SHMEM		(1<<10)
 #define UFFD_FEATURE_EXACT_ADDRESS		(1<<11)
+#define UFFD_FEATURE_WP_HUGETLBFS_SHMEM		(1<<12)
 	__u64 features;
 
 	__u64 ioctls;
-- 
cgit v1.2.3


From 390ed29b5e425ba00da2b6113b74a14949f71b02 Mon Sep 17 00:00:00 2001
From: Hao Xu <howeyxu@tencent.com>
Date: Sat, 14 May 2022 22:20:43 +0800
Subject: io_uring: add IORING_ACCEPT_MULTISHOT for accept

add an accept_flag IORING_ACCEPT_MULTISHOT for accept, which is to
support multishot.

Signed-off-by: Hao Xu <howeyxu@tencent.com>
Link: https://lore.kernel.org/r/20220514142046.58072-2-haoxu.linux@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 36ec43dc7bf9..15f821af9242 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -232,6 +232,11 @@ enum {
  */
 #define IORING_RECVSEND_POLL_FIRST	(1U << 0)
 
+/*
+ * accept flags stored in sqe->ioprio
+ */
+#define IORING_ACCEPT_MULTISHOT	(1U << 0)
+
 /*
  * IO completion data structure (Completion Queue Entry)
  */
-- 
cgit v1.2.3


From 8fa10ee183c3a1ecb53e81c95895ed5bc2a5530a Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Sun, 15 May 2022 21:58:31 +0100
Subject: cdrom: mark CDROMGETSPINDOWN/CDROMSETSPINDOWN obsolete

These were only implemented by the IDE CD driver, which has since
been removed.  Given that nobody is likely to create new CD/DVD
hardware (and associated drivers) we can mark these appropriately.

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Phillip Potter <phil@philpotter.co.uk>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Link: https://lore.kernel.org/all/20220427132436.12795-3-paul.gortmaker@windriver.com
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20220515205833.944139-4-phil@philpotter.co.uk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/cdrom.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h
index 804ff8d98f71..011e594e4a0d 100644
--- a/include/uapi/linux/cdrom.h
+++ b/include/uapi/linux/cdrom.h
@@ -103,7 +103,7 @@
 #define CDROMREADALL		0x5318	/* read all 2646 bytes */
 
 /* 
- * These ioctls are (now) only in ide-cd.c for controlling 
+ * These ioctls were only in (now removed) ide-cd.c for controlling
  * drive spindown time.  They should be implemented in the
  * Uniform driver, via generic packet commands, GPCMD_MODE_SELECT_10,
  * GPCMD_MODE_SENSE_10 and the GPMODE_POWER_PAGE...
-- 
cgit v1.2.3


From 89527be8d8d672773eeaec910118a6e84fb597e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 13 May 2022 11:33:56 -0700
Subject: net: add IFLA_TSO_{MAX_SIZE|SEGS} attributes

New netlink attributes IFLA_TSO_MAX_SIZE and IFLA_TSO_MAX_SEGS
are used to report to user-space the device TSO limits.

ip -d link sh dev eth1
...
   tso_max_size 65536 tso_max_segs 65535

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index d1e600816b82..5f58dcfe2787 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -368,6 +368,8 @@ enum {
 	IFLA_PARENT_DEV_NAME,
 	IFLA_PARENT_DEV_BUS_NAME,
 	IFLA_GRO_MAX_SIZE,
+	IFLA_TSO_MAX_SIZE,
+	IFLA_TSO_MAX_SEGS,
 
 	__IFLA_MAX
 };
-- 
cgit v1.2.3


From f04fbcc64e4be16185151f9fca44ea1b3d074bd0 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 20 Apr 2022 16:08:27 +0800
Subject: btrfs: move definition of btrfs_raid_types to volumes.h

It's only internally used as another way to represent btrfs profiles,
it's not exposed through any on-disk format, in fact this
btrfs_raid_types is diverted from the on-disk format values.

Furthermore, since it's internal structure, its definition can change in
the future.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs_tree.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index b069752a8ecf..d4117152d907 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -880,19 +880,6 @@ struct btrfs_dev_replace_item {
 #define BTRFS_BLOCK_GROUP_RESERVED	(BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
 					 BTRFS_SPACE_INFO_GLOBAL_RSV)
 
-enum btrfs_raid_types {
-	BTRFS_RAID_RAID10,
-	BTRFS_RAID_RAID1,
-	BTRFS_RAID_DUP,
-	BTRFS_RAID_RAID0,
-	BTRFS_RAID_SINGLE,
-	BTRFS_RAID_RAID5,
-	BTRFS_RAID_RAID6,
-	BTRFS_RAID_RAID1C3,
-	BTRFS_RAID_RAID1C4,
-	BTRFS_NR_RAID_TYPES
-};
-
 #define BTRFS_BLOCK_GROUP_TYPE_MASK	(BTRFS_BLOCK_GROUP_DATA |    \
 					 BTRFS_BLOCK_GROUP_SYSTEM |  \
 					 BTRFS_BLOCK_GROUP_METADATA)
-- 
cgit v1.2.3


From 1c05bb947f6464756174830b778aabf8f9d6ed0e Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Mon, 16 May 2022 12:12:02 +0200
Subject: include/uapi/linux/vfio.h: Fix trivial typo - _IORW should be _IOWR
 instead

There is no macro called _IORW, so use _IOWR in the comment instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Link: https://lore.kernel.org/r/20220516101202.88373-1-thuth@redhat.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/uapi/linux/vfio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index fea86061b44e..733a1cddde30 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -643,7 +643,7 @@ enum {
 };
 
 /**
- * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12,
+ * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12,
  *					      struct vfio_pci_hot_reset_info)
  *
  * Return: 0 on success, -errno on failure:
@@ -770,7 +770,7 @@ struct vfio_device_ioeventfd {
 #define VFIO_DEVICE_IOEVENTFD		_IO(VFIO_TYPE, VFIO_BASE + 16)
 
 /**
- * VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17,
+ * VFIO_DEVICE_FEATURE - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
  *			       struct vfio_device_feature)
  *
  * Get, set, or probe feature data of the device.  The feature is selected
-- 
cgit v1.2.3


From 9f39d36530e5678d092d53c5c2c60d82b4dcc169 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Sat, 7 May 2022 13:55:58 +0200
Subject: can: isotp: add support for transmission without flow control

Usually the ISO 15765-2 protocol is a point-to-point protocol to transfer
segmented PDUs to a dedicated receiver. This receiver sends a flow control
message to specify protocol options and timings (e.g. block size / STmin).

The so called functional addressing communication allows a 1:N
communication but is limited to a single frame length.

This new CAN_ISOTP_CF_BROADCAST allows an unconfirmed 1:N communication
with PDU length that would not fit into a single frame. This feature is
not covered by the ISO 15765-2 standard.

Link: https://lore.kernel.org/all/20220507115558.19065-1-socketcan@hartkopp.net
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/isotp.h | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h
index 590f8aea2b6d..439c982f7e81 100644
--- a/include/uapi/linux/can/isotp.h
+++ b/include/uapi/linux/can/isotp.h
@@ -124,18 +124,19 @@ struct can_isotp_ll_options {
 
 /* flags for isotp behaviour */
 
-#define CAN_ISOTP_LISTEN_MODE	0x001	/* listen only (do not send FC) */
-#define CAN_ISOTP_EXTEND_ADDR	0x002	/* enable extended addressing */
-#define CAN_ISOTP_TX_PADDING	0x004	/* enable CAN frame padding tx path */
-#define CAN_ISOTP_RX_PADDING	0x008	/* enable CAN frame padding rx path */
-#define CAN_ISOTP_CHK_PAD_LEN	0x010	/* check received CAN frame padding */
-#define CAN_ISOTP_CHK_PAD_DATA	0x020	/* check received CAN frame padding */
-#define CAN_ISOTP_HALF_DUPLEX	0x040	/* half duplex error state handling */
-#define CAN_ISOTP_FORCE_TXSTMIN	0x080	/* ignore stmin from received FC */
-#define CAN_ISOTP_FORCE_RXSTMIN	0x100	/* ignore CFs depending on rx stmin */
-#define CAN_ISOTP_RX_EXT_ADDR	0x200	/* different rx extended addressing */
-#define CAN_ISOTP_WAIT_TX_DONE	0x400	/* wait for tx completion */
-#define CAN_ISOTP_SF_BROADCAST	0x800	/* 1-to-N functional addressing */
+#define CAN_ISOTP_LISTEN_MODE	0x0001	/* listen only (do not send FC) */
+#define CAN_ISOTP_EXTEND_ADDR	0x0002	/* enable extended addressing */
+#define CAN_ISOTP_TX_PADDING	0x0004	/* enable CAN frame padding tx path */
+#define CAN_ISOTP_RX_PADDING	0x0008	/* enable CAN frame padding rx path */
+#define CAN_ISOTP_CHK_PAD_LEN	0x0010	/* check received CAN frame padding */
+#define CAN_ISOTP_CHK_PAD_DATA	0x0020	/* check received CAN frame padding */
+#define CAN_ISOTP_HALF_DUPLEX	0x0040	/* half duplex error state handling */
+#define CAN_ISOTP_FORCE_TXSTMIN	0x0080	/* ignore stmin from received FC */
+#define CAN_ISOTP_FORCE_RXSTMIN	0x0100	/* ignore CFs depending on rx stmin */
+#define CAN_ISOTP_RX_EXT_ADDR	0x0200	/* different rx extended addressing */
+#define CAN_ISOTP_WAIT_TX_DONE	0x0400	/* wait for tx completion */
+#define CAN_ISOTP_SF_BROADCAST	0x0800	/* 1-to-N functional addressing */
+#define CAN_ISOTP_CF_BROADCAST	0x1000	/* 1-to-N transmission w/o FC */
 
 /* protocol machine default values */
 
-- 
cgit v1.2.3


From b87f5e25b2f9deb503a61c6957c7b1680d91cfea Mon Sep 17 00:00:00 2001
From: Daniel Scally <djrscally@gmail.com>
Date: Fri, 6 May 2022 01:03:48 +0200
Subject: media: uapi: Add IPU3 packed Y10 format

Some platforms with an Intel IPU3 have an IR sensor producing 10 bit
greyscale format data that is transmitted over a CSI-2 bus to a CIO2
device - this packs the data into 32 bytes per 25 pixels. Add an entry
to the uAPI header defining that format.

Signed-off-by: Daniel Scally <djrscally@gmail.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
 include/uapi/linux/videodev2.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 6d465dc443b7..343b95107fce 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -569,6 +569,7 @@ struct v4l2_pix_format {
 /* Grey bit-packed formats */
 #define V4L2_PIX_FMT_Y10BPACK    v4l2_fourcc('Y', '1', '0', 'B') /* 10  Greyscale bit-packed */
 #define V4L2_PIX_FMT_Y10P    v4l2_fourcc('Y', '1', '0', 'P') /* 10  Greyscale, MIPI RAW10 packed */
+#define V4L2_PIX_FMT_IPU3_Y10		v4l2_fourcc('i', 'p', '3', 'y') /* IPU3 packed 10-bit greyscale */
 
 /* Palette formats */
 #define V4L2_PIX_FMT_PAL8    v4l2_fourcc('P', 'A', 'L', '8') /*  8  8-bit palette */
@@ -749,7 +750,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_QC08C    v4l2_fourcc('Q', '0', '8', 'C') /* Qualcomm 8-bit compressed */
 #define V4L2_PIX_FMT_QC10C    v4l2_fourcc('Q', '1', '0', 'C') /* Qualcomm 10-bit compressed */
 
-/* 10bit raw bayer packed, 32 bytes for every 25 pixels, last LSB 6 bits unused */
+/* 10bit raw packed, 32 bytes for every 25 pixels, last LSB 6 bits unused */
 #define V4L2_PIX_FMT_IPU3_SBGGR10	v4l2_fourcc('i', 'p', '3', 'b') /* IPU3 packed 10-bit BGGR bayer */
 #define V4L2_PIX_FMT_IPU3_SGBRG10	v4l2_fourcc('i', 'p', '3', 'g') /* IPU3 packed 10-bit GBRG bayer */
 #define V4L2_PIX_FMT_IPU3_SGRBG10	v4l2_fourcc('i', 'p', '3', 'G') /* IPU3 packed 10-bit GRBG bayer */
-- 
cgit v1.2.3


From 7c3e9fcad9c7d8bb5d69a576044fb16b1d2e8a01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= <jerome.pouiller@silabs.com>
Date: Tue, 17 May 2022 09:27:08 +0200
Subject: dma-buf: fix use of DMA_BUF_SET_NAME_{A,B} in userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The typedefs u32 and u64 are not available in userspace. Thus user get
an error he try to use DMA_BUF_SET_NAME_A or DMA_BUF_SET_NAME_B:

    $ gcc -Wall   -c -MMD -c -o ioctls_list.o ioctls_list.c
    In file included from /usr/include/x86_64-linux-gnu/asm/ioctl.h:1,
                     from /usr/include/linux/ioctl.h:5,
                     from /usr/include/asm-generic/ioctls.h:5,
                     from ioctls_list.c:11:
    ioctls_list.c:463:29: error: ‘u32’ undeclared here (not in a function)
      463 |     { "DMA_BUF_SET_NAME_A", DMA_BUF_SET_NAME_A, -1, -1 }, // linux/dma-buf.h
          |                             ^~~~~~~~~~~~~~~~~~
    ioctls_list.c:464:29: error: ‘u64’ undeclared here (not in a function)
      464 |     { "DMA_BUF_SET_NAME_B", DMA_BUF_SET_NAME_B, -1, -1 }, // linux/dma-buf.h
          |                             ^~~~~~~~~~~~~~~~~~

The issue was initially reported here[1].

[1]: https://github.com/jerome-pouiller/ioctl/pull/14

Signed-off-by: Jérôme Pouiller <jerome.pouiller@silabs.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Fixes: a5bff92eaac4 ("dma-buf: Fix SET_NAME ioctl uapi")
CC: stable@vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20220517072708.245265-1-Jerome.Pouiller@silabs.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/uapi/linux/dma-buf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
index 8e4a2ca0bcbf..b1523cb8ab30 100644
--- a/include/uapi/linux/dma-buf.h
+++ b/include/uapi/linux/dma-buf.h
@@ -92,7 +92,7 @@ struct dma_buf_sync {
  * between them in actual uapi, they're just different numbers.
  */
 #define DMA_BUF_SET_NAME	_IOW(DMA_BUF_BASE, 1, const char *)
-#define DMA_BUF_SET_NAME_A	_IOW(DMA_BUF_BASE, 1, u32)
-#define DMA_BUF_SET_NAME_B	_IOW(DMA_BUF_BASE, 1, u64)
+#define DMA_BUF_SET_NAME_A	_IOW(DMA_BUF_BASE, 1, __u32)
+#define DMA_BUF_SET_NAME_B	_IOW(DMA_BUF_BASE, 1, __u64)
 
 #endif
-- 
cgit v1.2.3


From c8383054506c77b814489c09877b5db83fd4abf2 Mon Sep 17 00:00:00 2001
From: Jeffle Xu <jefflexu@linux.alibaba.com>
Date: Mon, 25 Apr 2022 20:21:24 +0800
Subject: cachefiles: notify the user daemon when looking up cookie

Fscache/CacheFiles used to serve as a local cache for a remote
networking fs. A new on-demand read mode will be introduced for
CacheFiles, which can boost the scenario where on-demand read semantics
are needed, e.g. container image distribution.

The essential difference between these two modes is seen when a cache
miss occurs: In the original mode, the netfs will fetch the data from
the remote server and then write it to the cache file; in on-demand
read mode, fetching the data and writing it into the cache is delegated
to a user daemon.

As the first step, notify the user daemon when looking up cookie. In
this case, an anonymous fd is sent to the user daemon, through which the
user daemon can write the fetched data to the cache file. Since the user
daemon may move the anonymous fd around, e.g. through dup(), an object
ID uniquely identifying the cache file is also attached.

Also add one advisory flag (FSCACHE_ADV_WANT_CACHE_SIZE) suggesting that
the cache file size shall be retrieved at runtime. This helps the
scenario where one cache file contains multiple netfs files, e.g. for
the purpose of deduplication. In this case, netfs itself has no idea the
size of the cache file, whilst the user daemon should give the hint on
it.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
Link: https://lore.kernel.org/r/20220509074028.74954-3-jefflexu@linux.alibaba.com
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 include/uapi/linux/cachefiles.h | 50 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 include/uapi/linux/cachefiles.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/cachefiles.h b/include/uapi/linux/cachefiles.h
new file mode 100644
index 000000000000..521f2fe4fe9c
--- /dev/null
+++ b/include/uapi/linux/cachefiles.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_CACHEFILES_H
+#define _LINUX_CACHEFILES_H
+
+#include <linux/types.h>
+
+/*
+ * Fscache ensures that the maximum length of cookie key is 255. The volume key
+ * is controlled by netfs, and generally no bigger than 255.
+ */
+#define CACHEFILES_MSG_MAX_SIZE	1024
+
+enum cachefiles_opcode {
+	CACHEFILES_OP_OPEN,
+};
+
+/*
+ * Message Header
+ *
+ * @msg_id	a unique ID identifying this message
+ * @opcode	message type, CACHEFILE_OP_*
+ * @len		message length, including message header and following data
+ * @object_id	a unique ID identifying a cache file
+ * @data	message type specific payload
+ */
+struct cachefiles_msg {
+	__u32 msg_id;
+	__u32 opcode;
+	__u32 len;
+	__u32 object_id;
+	__u8  data[];
+};
+
+/*
+ * @data contains the volume_key followed directly by the cookie_key. volume_key
+ * is a NUL-terminated string; @volume_key_size indicates the size of the volume
+ * key in bytes. cookie_key is binary data, which is netfs specific;
+ * @cookie_key_size indicates the size of the cookie key in bytes.
+ *
+ * @fd identifies an anon_fd referring to the cache file.
+ */
+struct cachefiles_open {
+	__u32 volume_key_size;
+	__u32 cookie_key_size;
+	__u32 fd;
+	__u32 flags;
+	__u8  data[];
+};
+
+#endif
-- 
cgit v1.2.3


From 324b954ac80cff0d11ddb6bde9b6631e45e98620 Mon Sep 17 00:00:00 2001
From: Jeffle Xu <jefflexu@linux.alibaba.com>
Date: Mon, 25 Apr 2022 20:21:26 +0800
Subject: cachefiles: notify the user daemon when withdrawing cookie

Notify the user daemon that cookie is going to be withdrawn, providing a
hint that the associated anonymous fd can be closed.

Be noted that this is only a hint. The user daemon may close the
associated anonymous fd when receiving the CLOSE request, then it will
receive another anonymous fd when the cookie gets looked up. Or it may
ignore the CLOSE request, and keep writing data through the anonymous
fd. However the next time the cookie gets looked up, the user daemon
will still receive another new anonymous fd.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
Acked-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20220425122143.56815-5-jefflexu@linux.alibaba.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 include/uapi/linux/cachefiles.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/cachefiles.h b/include/uapi/linux/cachefiles.h
index 521f2fe4fe9c..37a0071037c8 100644
--- a/include/uapi/linux/cachefiles.h
+++ b/include/uapi/linux/cachefiles.h
@@ -12,6 +12,7 @@
 
 enum cachefiles_opcode {
 	CACHEFILES_OP_OPEN,
+	CACHEFILES_OP_CLOSE,
 };
 
 /*
-- 
cgit v1.2.3


From 9032b6e8589f269743984aac53e82e4835be16dc Mon Sep 17 00:00:00 2001
From: Jeffle Xu <jefflexu@linux.alibaba.com>
Date: Mon, 25 Apr 2022 20:21:27 +0800
Subject: cachefiles: implement on-demand read

Implement the data plane of on-demand read mode.

The early implementation [1] place the entry to
cachefiles_ondemand_read() in fscache_read(). However, fscache_read()
can only detect if the requested file range is fully cache miss, whilst
we need to notify the user daemon as long as there's a hole inside the
requested file range.

Thus the entry is now placed in cachefiles_prepare_read(). When working
in on-demand read mode, once a hole detected, the read routine will send
a READ request to the user daemon. The user daemon needs to fetch the
data and write it to the cache file. After sending the READ request, the
read routine will hang there, until the READ request is handled by the
user daemon. Then it will retry to read from the same file range. If no
progress encountered, the read routine will fail then.

A new NETFS_SREQ_ONDEMAND flag is introduced to indicate that on-demand
read should be done when a cache miss encountered.

[1] https://lore.kernel.org/all/20220406075612.60298-6-jefflexu@linux.alibaba.com/ #v8

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
Acked-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20220425122143.56815-6-jefflexu@linux.alibaba.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 include/uapi/linux/cachefiles.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/cachefiles.h b/include/uapi/linux/cachefiles.h
index 37a0071037c8..78caa73e5343 100644
--- a/include/uapi/linux/cachefiles.h
+++ b/include/uapi/linux/cachefiles.h
@@ -3,6 +3,7 @@
 #define _LINUX_CACHEFILES_H
 
 #include <linux/types.h>
+#include <linux/ioctl.h>
 
 /*
  * Fscache ensures that the maximum length of cookie key is 255. The volume key
@@ -13,6 +14,7 @@
 enum cachefiles_opcode {
 	CACHEFILES_OP_OPEN,
 	CACHEFILES_OP_CLOSE,
+	CACHEFILES_OP_READ,
 };
 
 /*
@@ -48,4 +50,19 @@ struct cachefiles_open {
 	__u8  data[];
 };
 
+/*
+ * @off		indicates the starting offset of the requested file range
+ * @len		indicates the length of the requested file range
+ */
+struct cachefiles_read {
+	__u64 off;
+	__u64 len;
+};
+
+/*
+ * Reply for READ request
+ * @arg for this ioctl is the @id field of READ request.
+ */
+#define CACHEFILES_IOC_READ_COMPLETE	_IOW(0x98, 1, int)
+
 #endif
-- 
cgit v1.2.3


From c7fb19428d67dd0a2a78a4f237af01d39c78dc5a Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 30 Apr 2022 14:38:53 -0600
Subject: io_uring: add support for ring mapped supplied buffers

Provided buffers allow an application to supply io_uring with buffers
that can then be grabbed for a read/receive request, when the data
source is ready to deliver data. The existing scheme relies on using
IORING_OP_PROVIDE_BUFFERS to do that, but it can be difficult to use
in real world applications. It's pretty efficient if the application
is able to supply back batches of provided buffers when they have been
consumed and the application is ready to recycle them, but if
fragmentation occurs in the buffer space, it can become difficult to
supply enough buffers at the time. This hurts efficiency.

Add a register op, IORING_REGISTER_PBUF_RING, which allows an application
to setup a shared queue for each buffer group of provided buffers. The
application can then supply buffers simply by adding them to this ring,
and the kernel can consume then just as easily. The ring shares the head
with the application, the tail remains private in the kernel.

Provided buffers setup with IORING_REGISTER_PBUF_RING cannot use
IORING_OP_{PROVIDE,REMOVE}_BUFFERS for adding or removing entries to the
ring, they must use the mapped ring. Mapped provided buffer rings can
co-exist with normal provided buffers, just not within the same group ID.

To gauge overhead of the existing scheme and evaluate the mapped ring
approach, a simple NOP benchmark was written. It uses a ring of 128
entries, and submits/completes 32 at the time. 'Replenish' is how
many buffers are provided back at the time after they have been
consumed:

Test			Replenish			NOPs/sec
================================================================
No provided buffers	NA				~30M
Provided buffers	32				~16M
Provided buffers	 1				~10M
Ring buffers		32				~27M
Ring buffers		 1				~27M

The ring mapped buffers perform almost as well as not using provided
buffers at all, and they don't care if you provided 1 or more back at
the same time. This means application can just replenish as they go,
rather than need to batch and compact, further reducing overhead in the
application. The NOP benchmark above doesn't need to do any compaction,
so that overhead isn't even reflected in the above test.

Co-developed-by: Dylan Yudaken <dylany@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 15f821af9242..ddf969ae5a79 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -384,6 +384,10 @@ enum {
 	IORING_REGISTER_RING_FDS		= 20,
 	IORING_UNREGISTER_RING_FDS		= 21,
 
+	/* register ring based provide buffer group */
+	IORING_REGISTER_PBUF_RING		= 22,
+	IORING_UNREGISTER_PBUF_RING		= 23,
+
 	/* this goes last */
 	IORING_REGISTER_LAST
 };
@@ -461,6 +465,38 @@ struct io_uring_restriction {
 	__u32 resv2[3];
 };
 
+struct io_uring_buf {
+	__u64	addr;
+	__u32	len;
+	__u16	bid;
+	__u16	resv;
+};
+
+struct io_uring_buf_ring {
+	union {
+		/*
+		 * To avoid spilling into more pages than we need to, the
+		 * ring tail is overlaid with the io_uring_buf->resv field.
+		 */
+		struct {
+			__u64	resv1;
+			__u32	resv2;
+			__u16	resv3;
+			__u16	tail;
+		};
+		struct io_uring_buf	bufs[0];
+	};
+};
+
+/* argument for IORING_(UN)REGISTER_PBUF_RING */
+struct io_uring_buf_reg {
+	__u64	ring_addr;
+	__u32	ring_entries;
+	__u16	bgid;
+	__u16	pad;
+	__u64	resv[3];
+};
+
 /*
  * io_uring_restriction->opcode values
  */
-- 
cgit v1.2.3


From c1318b39c7d36bd5139a9c71044ff2b2d3c6f9d8 Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@nvidia.com>
Date: Wed, 18 May 2022 12:27:31 +0300
Subject: tls: Add opt-in zerocopy mode of sendfile()

TLS device offload copies sendfile data to a bounce buffer before
transmitting. It allows to maintain the valid MAC on TLS records when
the file contents change and a part of TLS record has to be
retransmitted on TCP level.

In many common use cases (like serving static files over HTTPS) the file
contents are not changed on the fly. In many use cases breaking the
connection is totally acceptable if the file is changed during
transmission, because it would be received corrupted in any case.

This commit allows to optimize performance for such use cases to
providing a new optional mode of TLS sendfile(), in which the extra copy
is skipped. Removing this copy improves performance significantly, as
TLS and TCP sendfile perform the same operations, and the only overhead
is TLS header/trailer insertion.

The new mode can only be enabled with the new socket option named
TLS_TX_ZEROCOPY_SENDFILE on per-socket basis. It preserves backwards
compatibility with existing applications that rely on the copying
behavior.

The new mode is safe, meaning that unsolicited modifications of the file
being sent can't break integrity of the kernel. The worst thing that can
happen is sending a corrupted TLS record, which is in any case not
forbidden when using regular TCP sockets.

Sockets other than TLS device offload are not affected by the new socket
option. The actual status of zerocopy sendfile can be queried with
sock_diag.

Performance numbers in a single-core test with 24 HTTPS streams on
nginx, under 100% CPU load:

* non-zerocopy: 33.6 Gbit/s
* zerocopy: 79.92 Gbit/s

CPU: Intel(R) Xeon(R) Platinum 8380 CPU @ 2.30GHz

Signed-off-by: Boris Pismenny <borisp@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20220518092731.1243494-1-maximmi@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/tls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index 5f38be0ec0f3..ac39328eabe7 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -39,6 +39,7 @@
 /* TLS socket options */
 #define TLS_TX			1	/* Set transmit parameters */
 #define TLS_RX			2	/* Set receive parameters */
+#define TLS_TX_ZEROCOPY_SENDFILE	3	/* transmit zerocopy sendfile */
 
 /* Supported versions */
 #define TLS_VERSION_MINOR(ver)	((ver) & 0xFF)
@@ -160,6 +161,7 @@ enum {
 	TLS_INFO_CIPHER,
 	TLS_INFO_TXCONF,
 	TLS_INFO_RXCONF,
+	TLS_INFO_ZC_SENDFILE,
 	__TLS_INFO_MAX,
 };
 #define TLS_INFO_MAX (__TLS_INFO_MAX - 1)
-- 
cgit v1.2.3


From 58e5bdeb9c2b06895e723c0b1e670f54510ff782 Mon Sep 17 00:00:00 2001
From: Kanchan Joshi <joshi.k@samsung.com>
Date: Fri, 20 May 2022 14:36:30 +0530
Subject: nvme: enable uring-passthrough for admin commands

Add two new opcodes that userspace can use for admin commands:
NVME_URING_CMD_ADMIN : non-vectroed
NVME_URING_CMD_ADMIN_VEC : vectored variant

Wire up support when these are issued on controller node(/dev/nvmeX).

Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220520090630.70394-3-joshi.k@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/nvme_ioctl.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
index 0b1876aa5a59..2f76cba67166 100644
--- a/include/uapi/linux/nvme_ioctl.h
+++ b/include/uapi/linux/nvme_ioctl.h
@@ -108,5 +108,7 @@ struct nvme_uring_cmd {
 /* io_uring async commands: */
 #define NVME_URING_CMD_IO	_IOWR('N', 0x80, struct nvme_uring_cmd)
 #define NVME_URING_CMD_IO_VEC	_IOWR('N', 0x81, struct nvme_uring_cmd)
+#define NVME_URING_CMD_ADMIN	_IOWR('N', 0x82, struct nvme_uring_cmd)
+#define NVME_URING_CMD_ADMIN_VEC _IOWR('N', 0x83, struct nvme_uring_cmd)
 
 #endif /* _UAPI_LINUX_NVME_IOCTL_H */
-- 
cgit v1.2.3


From 3bc253c2e652cf5f12cd8c00d80d8ec55d67d1a7 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Thu, 19 May 2022 16:30:10 -0700
Subject: bpf: Add bpf_skc_to_mptcp_sock_proto

This patch implements a new struct bpf_func_proto, named
bpf_skc_to_mptcp_sock_proto. Define a new bpf_id BTF_SOCK_TYPE_MPTCP,
and a new helper bpf_skc_to_mptcp_sock(), which invokes another new
helper bpf_mptcp_sock_from_subflow() in net/mptcp/bpf.c to get struct
mptcp_sock from a given subflow socket.

v2: Emit BTF type, add func_id checks in verifier.c and bpf_trace.c,
remove build check for CONFIG_BPF_JIT
v5: Drop EXPORT_SYMBOL (Martin)

Co-developed-by: Nicolas Rybowski <nicolas.rybowski@tessares.net>
Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Nicolas Rybowski <nicolas.rybowski@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220519233016.105670-2-mathew.j.martineau@linux.intel.com
---
 include/uapi/linux/bpf.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0210f85131b3..56688bee20d9 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5172,6 +5172,12 @@ union bpf_attr {
  * 	Return
  * 		Map value associated to *key* on *cpu*, or **NULL** if no entry
  * 		was found or *cpu* is invalid.
+ *
+ * struct mptcp_sock *bpf_skc_to_mptcp_sock(void *sk)
+ *	Description
+ *		Dynamically cast a *sk* pointer to a *mptcp_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or **NULL** otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5370,6 +5376,7 @@ union bpf_attr {
 	FN(ima_file_hash),		\
 	FN(kptr_xchg),			\
 	FN(map_lookup_percpu_elem),     \
+	FN(skc_to_mptcp_sock),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From a13e248ff90e81e9322406c0e618cf2168702f4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= <mic@digikod.net>
Date: Fri, 6 May 2022 18:08:11 +0200
Subject: landlock: Fix landlock_add_rule(2) documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is not mandatory to pass a file descriptor obtained with the O_PATH
flag.  Also, replace rule's accesses with ruleset's accesses.

Link: https://lore.kernel.org/r/20220506160820.524344-2-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 include/uapi/linux/landlock.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index 15c31abb0d76..21c8d58283c9 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -62,8 +62,9 @@ struct landlock_path_beneath_attr {
 	 */
 	__u64 allowed_access;
 	/**
-	 * @parent_fd: File descriptor, open with ``O_PATH``, which identifies
-	 * the parent directory of a file hierarchy, or just a file.
+	 * @parent_fd: File descriptor, preferably opened with ``O_PATH``,
+	 * which identifies the parent directory of a file hierarchy, or just a
+	 * file.
 	 */
 	__s32 parent_fd;
 	/*
-- 
cgit v1.2.3


From b91c3e4ea756b12b7d992529226edce1cfd854d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= <mic@digikod.net>
Date: Fri, 6 May 2022 18:10:57 +0200
Subject: landlock: Add support for file reparenting with
 LANDLOCK_ACCESS_FS_REFER
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new LANDLOCK_ACCESS_FS_REFER access right to enable policy writers
to allow sandboxed processes to link and rename files from and to a
specific set of file hierarchies.  This access right should be composed
with LANDLOCK_ACCESS_FS_MAKE_* for the destination of a link or rename,
and with LANDLOCK_ACCESS_FS_REMOVE_* for a source of a rename.  This
lift a Landlock limitation that always denied changing the parent of an
inode.

Renaming or linking to the same directory is still always allowed,
whatever LANDLOCK_ACCESS_FS_REFER is used or not, because it is not
considered a threat to user data.

However, creating multiple links or renaming to a different parent
directory may lead to privilege escalations if not handled properly.
Indeed, we must be sure that the source doesn't gain more privileges by
being accessible from the destination.  This is handled by making sure
that the source hierarchy (including the referenced file or directory
itself) restricts at least as much the destination hierarchy.  If it is
not the case, an EXDEV error is returned, making it potentially possible
for user space to copy the file hierarchy instead of moving or linking
it.

Instead of creating different access rights for the source and the
destination, we choose to make it simple and consistent for users.
Indeed, considering the previous constraint, it would be weird to
require such destination access right to be also granted to the source
(to make it a superset).  Moreover, RENAME_EXCHANGE would also add to
the confusion because of paths being both a source and a destination.

See the provided documentation for additional details.

New tests are provided with a following commit.

Reviewed-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
Link: https://lore.kernel.org/r/20220506161102.525323-8-mic@digikod.net
---
 include/uapi/linux/landlock.h | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index 21c8d58283c9..23df4e0e8ace 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -21,8 +21,14 @@ struct landlock_ruleset_attr {
 	/**
 	 * @handled_access_fs: Bitmask of actions (cf. `Filesystem flags`_)
 	 * that is handled by this ruleset and should then be forbidden if no
-	 * rule explicitly allow them.  This is needed for backward
-	 * compatibility reasons.
+	 * rule explicitly allow them: it is a deny-by-default list that should
+	 * contain as much Landlock access rights as possible. Indeed, all
+	 * Landlock filesystem access rights that are not part of
+	 * handled_access_fs are allowed.  This is needed for backward
+	 * compatibility reasons.  One exception is the
+	 * LANDLOCK_ACCESS_FS_REFER access right, which is always implicitly
+	 * handled, but must still be explicitly handled to add new rules with
+	 * this access right.
 	 */
 	__u64 handled_access_fs;
 };
@@ -112,6 +118,22 @@ struct landlock_path_beneath_attr {
  * - %LANDLOCK_ACCESS_FS_MAKE_FIFO: Create (or rename or link) a named pipe.
  * - %LANDLOCK_ACCESS_FS_MAKE_BLOCK: Create (or rename or link) a block device.
  * - %LANDLOCK_ACCESS_FS_MAKE_SYM: Create (or rename or link) a symbolic link.
+ * - %LANDLOCK_ACCESS_FS_REFER: Link or rename a file from or to a different
+ *   directory (i.e. reparent a file hierarchy).  This access right is
+ *   available since the second version of the Landlock ABI.  This is also the
+ *   only access right which is always considered handled by any ruleset in
+ *   such a way that reparenting a file hierarchy is always denied by default.
+ *   To avoid privilege escalation, it is not enough to add a rule with this
+ *   access right.  When linking or renaming a file, the destination directory
+ *   hierarchy must also always have the same or a superset of restrictions of
+ *   the source hierarchy.  If it is not the case, or if the domain doesn't
+ *   handle this access right, such actions are denied by default with errno
+ *   set to EXDEV.  Linking also requires a LANDLOCK_ACCESS_FS_MAKE_* access
+ *   right on the destination directory, and renaming also requires a
+ *   LANDLOCK_ACCESS_FS_REMOVE_* access right on the source's (file or
+ *   directory) parent.  Otherwise, such actions are denied with errno set to
+ *   EACCES.  The EACCES errno prevails over EXDEV to let user space
+ *   efficiently deal with an unrecoverable error.
  *
  * .. warning::
  *
@@ -137,6 +159,7 @@ struct landlock_path_beneath_attr {
 #define LANDLOCK_ACCESS_FS_MAKE_FIFO			(1ULL << 10)
 #define LANDLOCK_ACCESS_FS_MAKE_BLOCK			(1ULL << 11)
 #define LANDLOCK_ACCESS_FS_MAKE_SYM			(1ULL << 12)
+#define LANDLOCK_ACCESS_FS_REFER			(1ULL << 13)
 /* clang-format on */
 
 #endif /* _UAPI_LINUX_LANDLOCK_H */
-- 
cgit v1.2.3


From 97e03f521050c092919591e668107b3d69c5f426 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Mon, 23 May 2022 14:07:07 -0700
Subject: bpf: Add verifier support for dynptrs

This patch adds the bulk of the verifier work for supporting dynamic
pointers (dynptrs) in bpf.

A bpf_dynptr is opaque to the bpf program. It is a 16-byte structure
defined internally as:

struct bpf_dynptr_kern {
    void *data;
    u32 size;
    u32 offset;
} __aligned(8);

The upper 8 bits of *size* is reserved (it contains extra metadata about
read-only status and dynptr type). Consequently, a dynptr only supports
memory less than 16 MB.

There are different types of dynptrs (eg malloc, ringbuf, ...). In this
patchset, the most basic one, dynptrs to a bpf program's local memory,
is added. For now only local memory that is of reg type PTR_TO_MAP_VALUE
is supported.

In the verifier, dynptr state information will be tracked in stack
slots. When the program passes in an uninitialized dynptr
(ARG_PTR_TO_DYNPTR | MEM_UNINIT), the stack slots corresponding
to the frame pointer where the dynptr resides at are marked
STACK_DYNPTR. For helper functions that take in initialized dynptrs (eg
bpf_dynptr_read + bpf_dynptr_write which are added later in this
patchset), the verifier enforces that the dynptr has been initialized
properly by checking that their corresponding stack slots have been
marked as STACK_DYNPTR.

The 6th patch in this patchset adds test cases that the verifier should
successfully reject, such as for example attempting to use a dynptr
after doing a direct write into it inside the bpf program.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/bpf/20220523210712.3641569-2-joannelkoong@gmail.com
---
 include/uapi/linux/bpf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 56688bee20d9..610944cb3389 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6528,6 +6528,11 @@ struct bpf_timer {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_dynptr {
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
-- 
cgit v1.2.3


From 263ae152e96253f40c2c276faad8629e096b3bad Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Mon, 23 May 2022 14:07:08 -0700
Subject: bpf: Add bpf_dynptr_from_mem for local dynptrs

This patch adds a new api bpf_dynptr_from_mem:

long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr);

which initializes a dynptr to point to a bpf program's local memory. For now
only local memory that is of reg type PTR_TO_MAP_VALUE is supported.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220523210712.3641569-3-joannelkoong@gmail.com
---
 include/uapi/linux/bpf.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 610944cb3389..9be3644457dd 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5178,6 +5178,17 @@ union bpf_attr {
  *		Dynamically cast a *sk* pointer to a *mptcp_sock* pointer.
  *	Return
  *		*sk* if casting is valid, or **NULL** otherwise.
+ *
+ * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr)
+ *	Description
+ *		Get a dynptr to local memory *data*.
+ *
+ *		*data* must be a ptr to a map value.
+ *		The maximum *size* supported is DYNPTR_MAX_SIZE.
+ *		*flags* is currently unused.
+ *	Return
+ *		0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE,
+ *		-EINVAL if flags is not 0.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5377,6 +5388,7 @@ union bpf_attr {
 	FN(kptr_xchg),			\
 	FN(map_lookup_percpu_elem),     \
 	FN(skc_to_mptcp_sock),		\
+	FN(dynptr_from_mem),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From bc34dee65a65e9c920c420005b8a43f2a721a458 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Mon, 23 May 2022 14:07:09 -0700
Subject: bpf: Dynptr support for ring buffers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, our only way of writing dynamically-sized data into a ring
buffer is through bpf_ringbuf_output but this incurs an extra memcpy
cost. bpf_ringbuf_reserve + bpf_ringbuf_commit avoids this extra
memcpy, but it can only safely support reservation sizes that are
statically known since the verifier cannot guarantee that the bpf
program won’t access memory outside the reserved space.

The bpf_dynptr abstraction allows for dynamically-sized ring buffer
reservations without the extra memcpy.

There are 3 new APIs:

long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr);
void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags);
void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags);

These closely follow the functionalities of the original ringbuf APIs.
For example, all ringbuffer dynptrs that have been reserved must be
either submitted or discarded before the program exits.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/bpf/20220523210712.3641569-4-joannelkoong@gmail.com
---
 include/uapi/linux/bpf.h | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9be3644457dd..081a55540aa5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5189,6 +5189,38 @@ union bpf_attr {
  *	Return
  *		0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE,
  *		-EINVAL if flags is not 0.
+ *
+ * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr)
+ *	Description
+ *		Reserve *size* bytes of payload in a ring buffer *ringbuf*
+ *		through the dynptr interface. *flags* must be 0.
+ *
+ *		Please note that a corresponding bpf_ringbuf_submit_dynptr or
+ *		bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the
+ *		reservation fails. This is enforced by the verifier.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags)
+ *	Description
+ *		Submit reserved ring buffer sample, pointed to by *data*,
+ *		through the dynptr interface. This is a no-op if the dynptr is
+ *		invalid/null.
+ *
+ *		For more information on *flags*, please see
+ *		'bpf_ringbuf_submit'.
+ *	Return
+ *		Nothing. Always succeeds.
+ *
+ * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags)
+ *	Description
+ *		Discard reserved ring buffer sample through the dynptr
+ *		interface. This is a no-op if the dynptr is invalid/null.
+ *
+ *		For more information on *flags*, please see
+ *		'bpf_ringbuf_discard'.
+ *	Return
+ *		Nothing. Always succeeds.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5389,6 +5421,9 @@ union bpf_attr {
 	FN(map_lookup_percpu_elem),     \
 	FN(skc_to_mptcp_sock),		\
 	FN(dynptr_from_mem),		\
+	FN(ringbuf_reserve_dynptr),	\
+	FN(ringbuf_submit_dynptr),	\
+	FN(ringbuf_discard_dynptr),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 13bbbfbea7598ea9f8d9c3d73bf053bb57f9c4b2 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Mon, 23 May 2022 14:07:10 -0700
Subject: bpf: Add bpf_dynptr_read and bpf_dynptr_write

This patch adds two helper functions, bpf_dynptr_read and
bpf_dynptr_write:

long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset);

long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len);

The dynptr passed into these functions must be valid dynptrs that have
been initialized.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220523210712.3641569-5-joannelkoong@gmail.com
---
 include/uapi/linux/bpf.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 081a55540aa5..efe2505650e6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5221,6 +5221,23 @@ union bpf_attr {
  *		'bpf_ringbuf_discard'.
  *	Return
  *		Nothing. Always succeeds.
+ *
+ * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset)
+ *	Description
+ *		Read *len* bytes from *src* into *dst*, starting from *offset*
+ *		into *src*.
+ *	Return
+ *		0 on success, -E2BIG if *offset* + *len* exceeds the length
+ *		of *src*'s data, -EINVAL if *src* is an invalid dynptr.
+ *
+ * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len)
+ *	Description
+ *		Write *len* bytes from *src* into *dst*, starting from *offset*
+ *		into *dst*.
+ *	Return
+ *		0 on success, -E2BIG if *offset* + *len* exceeds the length
+ *		of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
+ *		is a read-only dynptr.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5424,6 +5441,8 @@ union bpf_attr {
 	FN(ringbuf_reserve_dynptr),	\
 	FN(ringbuf_submit_dynptr),	\
 	FN(ringbuf_discard_dynptr),	\
+	FN(dynptr_read),		\
+	FN(dynptr_write),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 34d4ef5775f776ec4b0d53a02d588bf3195cada6 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Mon, 23 May 2022 14:07:11 -0700
Subject: bpf: Add dynptr data slices

This patch adds a new helper function

void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len);

which returns a pointer to the underlying data of a dynptr. *len*
must be a statically known value. The bpf program may access the returned
data slice as a normal buffer (eg can do direct reads and writes), since
the verifier associates the length with the returned pointer, and
enforces that no out of bounds accesses occur.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220523210712.3641569-6-joannelkoong@gmail.com
---
 include/uapi/linux/bpf.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index efe2505650e6..f4009dbdf62d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5238,6 +5238,17 @@ union bpf_attr {
  *		0 on success, -E2BIG if *offset* + *len* exceeds the length
  *		of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
  *		is a read-only dynptr.
+ *
+ * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
+ *	Description
+ *		Get a pointer to the underlying dynptr data.
+ *
+ *		*len* must be a statically known value. The returned data slice
+ *		is invalidated whenever the dynptr is invalidated.
+ *	Return
+ *		Pointer to the underlying dynptr data, NULL if the dynptr is
+ *		read-only, if the dynptr is invalid, or if the offset and length
+ *		is out of bounds.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5443,6 +5454,7 @@ union bpf_attr {
 	FN(ringbuf_discard_dynptr),	\
 	FN(dynptr_read),		\
 	FN(dynptr_write),		\
+	FN(dynptr_data),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From e5499dd7253c8382d03f687f19a854adcc688357 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Wed, 25 May 2022 14:08:30 +0100
Subject: media: lirc: revert removal of unused feature flags

Commit b2a90f4fcb14 ("media: lirc: remove unused lirc features") removed
feature flags which were never implemented, but they are still used by
the lirc daemon went built from source.

Reinstate these symbols in order not to break the lirc build.

Fixes: b2a90f4fcb14 ("media: lirc: remove unused lirc features")
Link: https://lore.kernel.org/all/a0470450-ecfd-2918-e04a-7b57c1fd7694@kernel.org/
Reported-by: Jiri Slaby <jirislaby@kernel.org>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/lirc.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h
index 23b0f2c8ba81..8d7ca7c6af42 100644
--- a/include/uapi/linux/lirc.h
+++ b/include/uapi/linux/lirc.h
@@ -84,6 +84,13 @@
 #define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK)
 #define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK)
 
+/*
+ * Unused features. These features were never implemented, in tree or
+ * out of tree. These definitions are here so not to break the lircd build.
+ */
+#define LIRC_CAN_SET_REC_FILTER		0
+#define LIRC_CAN_NOTIFY_DECODE		0
+
 /*** IOCTL commands for lirc driver ***/
 
 #define LIRC_GET_FEATURES              _IOR('i', 0x00000000, __u32)
-- 
cgit v1.2.3


From caa28984163cb63ea0be4cb8dbf05defdc7303f9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 25 May 2022 09:02:19 -0700
Subject: linux/types.h: reinstate "__bitwise__" macro for user space use

Commit c724c866bb70 ("linux/types.h: remove unnecessary __bitwise__")
was right that there are no users of __bitwise__ in the kernel, but it
turns out there are user space users of it that do expect it.

It is, after all, in the uapi directory, so user space usage is to be
expected.

Instead of reverting the commit completely, let's just clarify the
situation so that it doesn't happen again, and have some in-code
explanations for why that "__bitwise__" still exists.

Reported-by: Jiri Slaby <jirislaby@kernel.org>
Cc: Bjorn Helgaas <helgaas@kernel.org>
Link: https://lore.kernel.org/all/b5c0a68d-8387-4909-beea-f70ab9e6e3d5@kernel.org/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/types.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h
index c4dc597f3dcf..308433be33c2 100644
--- a/include/uapi/linux/types.h
+++ b/include/uapi/linux/types.h
@@ -26,6 +26,9 @@
 #define __bitwise
 #endif
 
+/* The kernel doesn't use this legacy form, but user space does */
+#define __bitwise__ __bitwise
+
 typedef __u16 __bitwise __le16;
 typedef __u16 __bitwise __be16;
 typedef __u32 __bitwise __le32;
-- 
cgit v1.2.3


From a7c41b4687f5902af70cd559806990930c8a307b Mon Sep 17 00:00:00 2001
From: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Date: Mon, 30 May 2022 21:15:20 +0800
Subject: io_uring: let IORING_OP_FILES_UPDATE support choosing fixed file
 slots

One big issue with the file registration feature is that it needs user
space apps to maintain free slot info about io_uring's fixed file table,
which really is a burden for development. io_uring now supports choosing
free file slot for user space apps by using IORING_FILE_INDEX_ALLOC flag
in accept, open, and socket operations, but they need the app to use
direct accept or direct open, which not all apps are prepared to use yet.

To support apps that still need real fds, make use of the registration
feature easier. Let IORING_OP_FILES_UPDATE support choosing fixed file
slots, which will store picked fixed files slots in fd array and let cqe
return the number of slots allocated.

Suggested-by: Hao Xu <howeyxu@tencent.com>
Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
[axboe: move flag to uapi io_uring header, change goto to break, init]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 53e7dae92e42..776e0278f9dd 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -47,6 +47,7 @@ struct io_uring_sqe {
 		__u32		unlink_flags;
 		__u32		hardlink_flags;
 		__u32		xattr_flags;
+		__u32		close_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -258,6 +259,11 @@ enum io_uring_op {
  */
 #define IORING_ACCEPT_MULTISHOT	(1U << 0)
 
+/*
+ * close flags, store in sqe->close_flags
+ */
+#define IORING_CLOSE_FD_AND_FILE_SLOT	(1U << 0)
+
 /*
  * IO completion data structure (Completion Queue Entry)
  */
-- 
cgit v1.2.3


From 3e0b8f529c10037ae0b369fc892e524eae5a5485 Mon Sep 17 00:00:00 2001
From: Arun Ajith S <aajith@arista.com>
Date: Mon, 30 May 2022 10:14:14 +0000
Subject: net/ipv6: Expand and rename accept_unsolicited_na to
 accept_untracked_na

RFC 9131 changes default behaviour of handling RX of NA messages when the
corresponding entry is absent in the neighbour cache. The current
implementation is limited to accept just unsolicited NAs. However, the
RFC is more generic where it also accepts solicited NAs. Both types
should result in adding a STALE entry for this case.

Expand accept_untracked_na behaviour to also accept solicited NAs to
be compliant with the RFC and rename the sysctl knob to
accept_untracked_na.

Fixes: f9a2fb73318e ("net/ipv6: Introduce accept_unsolicited_na knob to implement router-side changes for RFC9131")
Signed-off-by: Arun Ajith S <aajith@arista.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20220530101414.65439-1-aajith@arista.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 549ddeaf788b..03cdbe798fe3 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -194,7 +194,7 @@ enum {
 	DEVCONF_IOAM6_ID,
 	DEVCONF_IOAM6_ID_WIDE,
 	DEVCONF_NDISC_EVICT_NOCARRIER,
-	DEVCONF_ACCEPT_UNSOLICITED_NA,
+	DEVCONF_ACCEPT_UNTRACKED_NA,
 	DEVCONF_MAX
 };
 
-- 
cgit v1.2.3


From 13b00b135665c92065a27c0c39dd97e0f380bd4f Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Wed, 18 May 2022 16:38:00 +0300
Subject: vdpa: Add support for querying vendor statistics

Allows to read vendor statistics of a vdpa device. The specific
statistics data are received from the upstream driver in the form of an
(attribute name, attribute value) pairs.

An example of statistics for mlx5_vdpa device are:

received_desc - number of descriptors received by the virtqueue
completed_desc - number of descriptors completed by the virtqueue

A descriptor using indirect buffers is still counted as 1. In addition,
N chained descriptors are counted correctly N times as one would expect.

A new callback was added to vdpa_config_ops which provides the means for
the vdpa driver to return statistics results.

The interface allows for reading all the supported virtqueues, including
the control virtqueue if it exists.

Below are some examples taken from mlx5_vdpa which are introduced in the
following patch:

1. Read statistics for the virtqueue at index 1

$ vdpa dev vstats show vdpa-a qidx 1
vdpa-a:
queue_type tx queue_index 1 received_desc 3844836 completed_desc 3844836

2. Read statistics for the virtqueue at index 32
$ vdpa dev vstats show vdpa-a qidx 32
vdpa-a:
queue_type control_vq queue_index 32 received_desc 62 completed_desc 62

3. Read statisitics for the virtqueue at index 0 with json output
$ vdpa -j dev vstats show vdpa-a qidx 0
{"vstats":{"vdpa-a":{
"queue_type":"rx","queue_index":0,"name":"received_desc","value":417776,\
 "name":"completed_desc","value":417548}}}

4. Read statistics for the virtqueue at index 0 with preety json output
$ vdpa -jp dev vstats show vdpa-a qidx 0
{
    "vstats": {
        "vdpa-a": {

            "queue_type": "rx",
            "queue_index": 0,
            "name": "received_desc",
            "value": 417776,
            "name": "completed_desc",
            "value": 417548
        }
    }
}

Signed-off-by: Eli Cohen <elic@nvidia.com>
Message-Id: <20220518133804.1075129-3-elic@nvidia.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/vdpa.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
index 1061d8d2d09d..25c55cab3d7c 100644
--- a/include/uapi/linux/vdpa.h
+++ b/include/uapi/linux/vdpa.h
@@ -18,6 +18,7 @@ enum vdpa_command {
 	VDPA_CMD_DEV_DEL,
 	VDPA_CMD_DEV_GET,		/* can dump */
 	VDPA_CMD_DEV_CONFIG_GET,	/* can dump */
+	VDPA_CMD_DEV_VSTATS_GET,
 };
 
 enum vdpa_attr {
@@ -46,6 +47,11 @@ enum vdpa_attr {
 	VDPA_ATTR_DEV_NEGOTIATED_FEATURES,	/* u64 */
 	VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,		/* u32 */
 	VDPA_ATTR_DEV_SUPPORTED_FEATURES,	/* u64 */
+
+	VDPA_ATTR_DEV_QUEUE_INDEX,              /* u32 */
+	VDPA_ATTR_DEV_VENDOR_ATTR_NAME,		/* string */
+	VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
+
 	/* new attributes must be added above here */
 	VDPA_ATTR_MAX,
 };
-- 
cgit v1.2.3


From 175d493c3c3e09a3abaa843068fae0f0ad42c47e Mon Sep 17 00:00:00 2001
From: Gautam Dawar <gautam.dawar@xilinx.com>
Date: Wed, 30 Mar 2022 23:33:41 +0530
Subject: vhost: move the backend feature bits to vhost_types.h

We should store feature bits in vhost_types.h as what has been done
for e.g VHOST_F_LOG_ALL.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Gautam Dawar <gdawar@xilinx.com>
Message-Id: <20220330180436.24644-2-gdawar@xilinx.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/vhost.h       | 5 -----
 include/uapi/linux/vhost_types.h | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index 5d99e7c242a2..8f7b4a95d6f9 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -89,11 +89,6 @@
 
 /* Set or get vhost backend capability */
 
-/* Use message type V2 */
-#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
-/* IOTLB can accept batching hints */
-#define VHOST_BACKEND_F_IOTLB_BATCH  0x2
-
 #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
 #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
 
diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
index f7f6a3a28977..76ee7016c501 100644
--- a/include/uapi/linux/vhost_types.h
+++ b/include/uapi/linux/vhost_types.h
@@ -153,4 +153,9 @@ struct vhost_vdpa_iova_range {
 /* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
 #define VHOST_NET_F_VIRTIO_NET_HDR 27
 
+/* Use message type V2 */
+#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
+/* IOTLB can accept batching hints */
+#define VHOST_BACKEND_F_IOTLB_BATCH  0x2
+
 #endif
-- 
cgit v1.2.3


From 91233ad711866f4e375742d84ef3ed6aab9daa96 Mon Sep 17 00:00:00 2001
From: Gautam Dawar <gautam.dawar@xilinx.com>
Date: Wed, 30 Mar 2022 23:33:49 +0530
Subject: vhost: support ASID in IOTLB API

This patches allows userspace to send ASID based IOTLB message to
vhost. This idea is to use the reserved u32 field in the existing V2
IOTLB message. Vhost device should advertise this capability via
VHOST_BACKEND_F_IOTLB_ASID backend feature.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Gautam Dawar <gdawar@xilinx.com>
Message-Id: <20220330180436.24644-10-gdawar@xilinx.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/vhost_types.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
index 76ee7016c501..634cee485abb 100644
--- a/include/uapi/linux/vhost_types.h
+++ b/include/uapi/linux/vhost_types.h
@@ -87,7 +87,7 @@ struct vhost_msg {
 
 struct vhost_msg_v2 {
 	__u32 type;
-	__u32 reserved;
+	__u32 asid;
 	union {
 		struct vhost_iotlb_msg iotlb;
 		__u8 padding[64];
@@ -157,5 +157,9 @@ struct vhost_vdpa_iova_range {
 #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
 /* IOTLB can accept batching hints */
 #define VHOST_BACKEND_F_IOTLB_BATCH  0x2
+/* IOTLB can accept address space identifier through V2 type of IOTLB
+ * message
+ */
+#define VHOST_BACKEND_F_IOTLB_ASID  0x3
 
 #endif
-- 
cgit v1.2.3


From 3ace88bd37436abc84906312146fe5158a469142 Mon Sep 17 00:00:00 2001
From: Gautam Dawar <gautam.dawar@xilinx.com>
Date: Wed, 30 Mar 2022 23:33:51 +0530
Subject: vhost-vdpa: introduce uAPI to get the number of virtqueue groups

Follows the vDPA support for multiple address spaces, this patch
introduce uAPI for the userspace to know the number of virtqueue
groups supported by the vDPA device.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Gautam Dawar <gdawar@xilinx.com>
Message-Id: <20220330180436.24644-12-gdawar@xilinx.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/vhost.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index 8f7b4a95d6f9..61317c61d768 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -145,11 +145,13 @@
 /* Get the valid iova range */
 #define VHOST_VDPA_GET_IOVA_RANGE	_IOR(VHOST_VIRTIO, 0x78, \
 					     struct vhost_vdpa_iova_range)
-
 /* Get the config size */
 #define VHOST_VDPA_GET_CONFIG_SIZE	_IOR(VHOST_VIRTIO, 0x79, __u32)
 
 /* Get the count of all virtqueues */
 #define VHOST_VDPA_GET_VQS_COUNT	_IOR(VHOST_VIRTIO, 0x80, __u32)
 
+/* Get the number of virtqueue groups. */
+#define VHOST_VDPA_GET_GROUP_NUM	_IOR(VHOST_VIRTIO, 0x81, __u32)
+
 #endif
-- 
cgit v1.2.3


From a0c95f201170bd559737d3cdc8a950aea62f29c6 Mon Sep 17 00:00:00 2001
From: Gautam Dawar <gautam.dawar@xilinx.com>
Date: Wed, 30 Mar 2022 23:33:52 +0530
Subject: vhost-vdpa: introduce uAPI to get the number of address spaces

This patch introduces the uAPI for getting the number of address
spaces supported by this vDPA device.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Gautam Dawar <gdawar@xilinx.com>
Message-Id: <20220330180436.24644-13-gdawar@xilinx.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/vhost.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index 61317c61d768..51322008901a 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -154,4 +154,6 @@
 /* Get the number of virtqueue groups. */
 #define VHOST_VDPA_GET_GROUP_NUM	_IOR(VHOST_VIRTIO, 0x81, __u32)
 
+/* Get the number of address spaces. */
+#define VHOST_VDPA_GET_AS_NUM		_IOR(VHOST_VIRTIO, 0x7A, unsigned int)
 #endif
-- 
cgit v1.2.3


From 2d1fcb7758e49fd9caf150f3c70804b95b2ce80c Mon Sep 17 00:00:00 2001
From: Gautam Dawar <gautam.dawar@xilinx.com>
Date: Wed, 30 Mar 2022 23:33:53 +0530
Subject: vhost-vdpa: uAPI to get virtqueue group id

Follows the support for virtqueue group in vDPA. This patches
introduces uAPI to get the virtqueue group ID for a specific virtqueue
in vhost-vdpa.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Gautam Dawar <gdawar@xilinx.com>
Message-Id: <20220330180436.24644-14-gdawar@xilinx.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/vhost.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index 51322008901a..668914c87f74 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -156,4 +156,12 @@
 
 /* Get the number of address spaces. */
 #define VHOST_VDPA_GET_AS_NUM		_IOR(VHOST_VIRTIO, 0x7A, unsigned int)
+
+/* Get the group for a virtqueue: read index, write group in num,
+ * The virtqueue index is stored in the index field of
+ * vhost_vring_state. The group for this specific virtqueue is
+ * returned via num field of vhost_vring_state.
+ */
+#define VHOST_VDPA_GET_VRING_GROUP	_IOWR(VHOST_VIRTIO, 0x7B,	\
+					      struct vhost_vring_state)
 #endif
-- 
cgit v1.2.3


From 84d7c8fd3aade2fe79313003ed06ede431ec2a6d Mon Sep 17 00:00:00 2001
From: Gautam Dawar <gautam.dawar@xilinx.com>
Date: Wed, 30 Mar 2022 23:33:54 +0530
Subject: vhost-vdpa: introduce uAPI to set group ASID

Follows the vDPA support for associating ASID to a specific virtqueue
group. This patch adds a uAPI to support setting them from userspace.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Gautam Dawar <gdawar@xilinx.com>
Message-Id: <20220330180436.24644-15-gdawar@xilinx.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/vhost.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index 668914c87f74..cab645d4a645 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -164,4 +164,11 @@
  */
 #define VHOST_VDPA_GET_VRING_GROUP	_IOWR(VHOST_VIRTIO, 0x7B,	\
 					      struct vhost_vring_state)
+/* Set the ASID for a virtqueue group. The group index is stored in
+ * the index field of vhost_vring_state, the ASID associated with this
+ * group is stored at num field of vhost_vring_state.
+ */
+#define VHOST_VDPA_SET_GROUP_ASID	_IOW(VHOST_VIRTIO, 0x7C, \
+					     struct vhost_vring_state)
+
 #endif
-- 
cgit v1.2.3


From 662ce1dc9caf493c309200edbe38d186f1ea20d0 Mon Sep 17 00:00:00 2001
From: Yang Yang <yang.yang29@zte.com.cn>
Date: Wed, 1 Jun 2022 15:55:25 -0700
Subject: delayacct: track delays from write-protect copy

Delay accounting does not track the delay of write-protect copy.  When
tasks trigger many write-protect copys(include COW and unsharing of
anonymous pages[1]), it may spend a amount of time waiting for them.  To
get the delay of tasks in write-protect copy, could help users to evaluate
the impact of using KSM or fork() or GUP.

Also update tools/accounting/getdelays.c:

    / # ./getdelays -dl -p 231
    print delayacct stats ON
    listen forever
    PID     231

    CPU             count     real total  virtual total    delay total  delay average
                     6247     1859000000     2154070021     1674255063          0.268ms
    IO              count    delay total  delay average
                        0              0              0ms
    SWAP            count    delay total  delay average
                        0              0              0ms
    RECLAIM         count    delay total  delay average
                        0              0              0ms
    THRASHING       count    delay total  delay average
                        0              0              0ms
    COMPACT         count    delay total  delay average
                        3          72758              0ms
    WPCOPY          count    delay total  delay average
                     3635      271567604              0ms

[1] commit 31cc5bc4af70("mm: support GUP-triggered unsharing of anonymous pages")

Link: https://lkml.kernel.org/r/20220409014342.2505532-1-yang.yang29@zte.com.cn
Signed-off-by: Yang Yang <yang.yang29@zte.com.cn>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jiang Xuexin <jiang.xuexin@zte.com.cn>
Reviewed-by: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Reviewed-by: wangyong <wang.yong12@zte.com.cn>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/uapi/linux/taskstats.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
index 736154171489..a7f5b11a8f1b 100644
--- a/include/uapi/linux/taskstats.h
+++ b/include/uapi/linux/taskstats.h
@@ -34,7 +34,7 @@
  */
 
 
-#define TASKSTATS_VERSION	12
+#define TASKSTATS_VERSION	13
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -194,6 +194,10 @@ struct taskstats {
 	__u64   ac_exe_dev;     /* program binary device ID */
 	__u64   ac_exe_inode;   /* program binary inode number */
 	/* v12 end */
+
+	/* v13: Delay waiting for write-protect copy */
+	__u64    wpcopy_count;
+	__u64    wpcopy_delay_total;
 };
 
 
-- 
cgit v1.2.3


From 8d3398ba2a0d1e25690f830192b7834acab003ec Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Tue, 31 May 2022 11:43:45 +0200
Subject: socket: Don't use u8 type in uapi socket.h

Use plain 255 instead, which also avoid introducing an additional header
dependency on <linux/types.h>

Fixes: 26859240e4ee ("txhash: Add socket option to control TX hash rethink behavior")
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Link: https://lore.kernel.org/r/20220531094345.13801-1-tklauser@distanz.ch
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/socket.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
index 51d6bb2f6765..d3fcd3b5ec53 100644
--- a/include/uapi/linux/socket.h
+++ b/include/uapi/linux/socket.h
@@ -31,7 +31,7 @@ struct __kernel_sockaddr_storage {
 
 #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
 
-#define SOCK_TXREHASH_DEFAULT	((u8)-1)
+#define SOCK_TXREHASH_DEFAULT	255
 #define SOCK_TXREHASH_DISABLED	0
 #define SOCK_TXREHASH_ENABLED	1
 
-- 
cgit v1.2.3


From 8cc5b032240ae5220b62c689c20459d3e1825b2d Mon Sep 17 00:00:00 2001
From: Carlos Llamas <cmllamas@google.com>
Date: Wed, 1 Jun 2022 01:00:17 +0000
Subject: binder: fix sender_euid type in uapi header

The {pid,uid}_t fields of struct binder_transaction were recently
replaced to use kernel types in commit 169adc2b6b3c ("android/binder.h:
add linux/android/binder(fs).h to UAPI compile-test coverage").

However, using __kernel_uid_t here breaks backwards compatibility in
architectures using 16-bits for this type, since glibc and some others
still expect a 32-bit uid_t. Instead, let's use __kernel_uid32_t which
avoids this compatibility problem.

Fixes: 169adc2b6b3c ("android/binder.h: add linux/android/binder(fs).h to UAPI compile-test coverage")
Reported-by: Christopher Ferris <cferris@google.com>
Signed-off-by: Carlos Llamas <cmllamas@google.com>
Acked-by: Todd Kjos <tkjos@google.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/linux/android/binder.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index 11157fae8a8e..688bcdaeed53 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -289,7 +289,7 @@ struct binder_transaction_data {
 	/* General information about the transaction. */
 	__u32	        flags;
 	__kernel_pid_t	sender_pid;
-	__kernel_uid_t	sender_euid;
+	__kernel_uid32_t	sender_euid;
 	binder_size_t	data_size;	/* number of bytes of data */
 	binder_size_t	offsets_size;	/* number of bytes of offsets */
 
-- 
cgit v1.2.3


From 08145b087e4481458f6075f3af58021a3cf8a940 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Tue, 31 May 2022 18:04:10 +0800
Subject: LoongArch: Add ELF-related definitions

Add ELF-related definitions for LoongArch, including: EM_LOONGARCH,
KEXEC_ARCH_LOONGARCH, AUDIT_ARCH_LOONGARCH32, AUDIT_ARCH_LOONGARCH64
and NT_LOONGARCH_*.

Reviewed-by: WANG Xuerui <git@xen0n.name>
Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 include/uapi/linux/audit.h  | 2 ++
 include/uapi/linux/elf-em.h | 1 +
 include/uapi/linux/elf.h    | 5 +++++
 include/uapi/linux/kexec.h  | 1 +
 4 files changed, 9 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 8eda133ca4c1..7c1dc818b1d5 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -439,6 +439,8 @@ enum {
 #define AUDIT_ARCH_UNICORE	(EM_UNICORE|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_X86_64	(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_XTENSA	(EM_XTENSA)
+#define AUDIT_ARCH_LOONGARCH32	(EM_LOONGARCH|__AUDIT_ARCH_LE)
+#define AUDIT_ARCH_LOONGARCH64	(EM_LOONGARCH|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 
 #define AUDIT_PERM_EXEC		1
 #define AUDIT_PERM_WRITE	2
diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h
index f47e853546fa..ef38c2bc5ab7 100644
--- a/include/uapi/linux/elf-em.h
+++ b/include/uapi/linux/elf-em.h
@@ -51,6 +51,7 @@
 #define EM_RISCV	243	/* RISC-V */
 #define EM_BPF		247	/* Linux BPF - in-kernel virtual machine */
 #define EM_CSKY		252	/* C-SKY */
+#define EM_LOONGARCH	258	/* LoongArch */
 #define EM_FRV		0x5441	/* Fujitsu FR-V */
 
 /*
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index c4abd09c3da9..2b9f5e9985e5 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -438,6 +438,11 @@ typedef struct elf64_shdr {
 #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
 #define NT_MIPS_FP_MODE	0x801		/* MIPS floating-point mode */
 #define NT_MIPS_MSA	0x802		/* MIPS SIMD registers */
+#define NT_LOONGARCH_CPUCFG	0xa00	/* LoongArch CPU config registers */
+#define NT_LOONGARCH_CSR	0xa01	/* LoongArch control and status registers */
+#define NT_LOONGARCH_LSX	0xa02	/* LoongArch Loongson SIMD Extension registers */
+#define NT_LOONGARCH_LASX	0xa03	/* LoongArch Loongson Advanced SIMD Extension registers */
+#define NT_LOONGARCH_LBT	0xa04	/* LoongArch Loongson Binary Translation registers */
 
 /* Note types with note name "GNU" */
 #define NT_GNU_PROPERTY_TYPE_0	5
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index fb7e2ef60825..981016e05cfa 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -43,6 +43,7 @@
 #define KEXEC_ARCH_MIPS    ( 8 << 16)
 #define KEXEC_ARCH_AARCH64 (183 << 16)
 #define KEXEC_ARCH_RISCV   (243 << 16)
+#define KEXEC_ARCH_LOONGARCH	(258 << 16)
 
 /* The artificial cap on the number of segments passed to kexec_load. */
 #define KEXEC_SEGMENT_MAX 16
-- 
cgit v1.2.3


From b489a6e5871690735752f8875f411e4d0cd8e5df Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@nvidia.com>
Date: Wed, 8 Jun 2022 18:34:25 +0300
Subject: tls: Rename TLS_INFO_ZC_SENDFILE to TLS_INFO_ZC_TX

To embrace possible future optimizations of TLS, rename zerocopy
sendfile definitions to more generic ones:

* setsockopt: TLS_TX_ZEROCOPY_SENDFILE- > TLS_TX_ZEROCOPY_RO
* sock_diag: TLS_INFO_ZC_SENDFILE -> TLS_INFO_ZC_RO_TX

RO stands for readonly and emphasizes that the application shouldn't
modify the data being transmitted with zerocopy to avoid potential
disconnection.

Fixes: c1318b39c7d3 ("tls: Add opt-in zerocopy mode of sendfile()")
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Link: https://lore.kernel.org/r/20220608153425.3151146-1-maximmi@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/tls.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index ac39328eabe7..bb8f80812b0b 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -39,7 +39,7 @@
 /* TLS socket options */
 #define TLS_TX			1	/* Set transmit parameters */
 #define TLS_RX			2	/* Set receive parameters */
-#define TLS_TX_ZEROCOPY_SENDFILE	3	/* transmit zerocopy sendfile */
+#define TLS_TX_ZEROCOPY_RO	3	/* TX zerocopy (only sendfile now) */
 
 /* Supported versions */
 #define TLS_VERSION_MINOR(ver)	((ver) & 0xFF)
@@ -161,7 +161,7 @@ enum {
 	TLS_INFO_CIPHER,
 	TLS_INFO_TXCONF,
 	TLS_INFO_RXCONF,
-	TLS_INFO_ZC_SENDFILE,
+	TLS_INFO_ZC_RO_TX,
 	__TLS_INFO_MAX,
 };
 #define TLS_INFO_MAX (__TLS_INFO_MAX - 1)
-- 
cgit v1.2.3


From d884b6498d2f022098502e106d5a45ab635f2e9a Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 14 Jun 2022 17:51:18 +0100
Subject: io_uring: remove IORING_CLOSE_FD_AND_FILE_SLOT

This partially reverts a7c41b4687f5902af70cd559806990930c8a307b

Even though IORING_CLOSE_FD_AND_FILE_SLOT might save cycles for some
users, but it tries to do two things at a time and it's not clear how to
handle errors and what to return in a single result field when one part
fails and another completes well. Kill it for now.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/837c745019b3795941eee4fcfd7de697886d645b.1655224415.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 776e0278f9dd..53e7dae92e42 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -47,7 +47,6 @@ struct io_uring_sqe {
 		__u32		unlink_flags;
 		__u32		hardlink_flags;
 		__u32		xattr_flags;
-		__u32		close_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -259,11 +258,6 @@ enum io_uring_op {
  */
 #define IORING_ACCEPT_MULTISHOT	(1U << 0)
 
-/*
- * close flags, store in sqe->close_flags
- */
-#define IORING_CLOSE_FD_AND_FILE_SLOT	(1U << 0)
-
 /*
  * IO completion data structure (Completion Queue Entry)
  */
-- 
cgit v1.2.3


From 06e445f740c1a0fe5d16b3dff8a4ef18e124e54e Mon Sep 17 00:00:00 2001
From: Ossama Othman <ossama.othman@intel.com>
Date: Mon, 27 Jun 2022 18:02:42 -0700
Subject: mptcp: fix conflict with <netinet/in.h>

Including <linux/mptcp.h> before the C library <netinet/in.h> header
causes symbol redefinition errors at compile-time due to duplicate
declarations and definitions in the <linux/in.h> header included by
<linux/mptcp.h>.

Explicitly include <netinet/in.h> before <linux/in.h> in
<linux/mptcp.h> when __KERNEL__ is not defined so that the C library
compatibility logic in <linux/libc-compat.h> is enabled when including
<linux/mptcp.h> in user space code.

Fixes: c11c5906bc0a ("mptcp: add MPTCP_SUBFLOW_ADDRS getsockopt support")
Signed-off-by: Ossama Othman <ossama.othman@intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/mptcp.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 921963589904..dfe19bf13f4c 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -2,16 +2,17 @@
 #ifndef _UAPI_MPTCP_H
 #define _UAPI_MPTCP_H
 
+#ifndef __KERNEL__
+#include <netinet/in.h>		/* for sockaddr_in and sockaddr_in6	*/
+#include <sys/socket.h>		/* for struct sockaddr			*/
+#endif
+
 #include <linux/const.h>
 #include <linux/types.h>
 #include <linux/in.h>		/* for sockaddr_in			*/
 #include <linux/in6.h>		/* for sockaddr_in6			*/
 #include <linux/socket.h>	/* for sockaddr_storage and sa_family	*/
 
-#ifndef __KERNEL__
-#include <sys/socket.h>		/* for struct sockaddr			*/
-#endif
-
 #define MPTCP_SUBFLOW_FLAG_MCAP_REM		_BITUL(0)
 #define MPTCP_SUBFLOW_FLAG_MCAP_LOC		_BITUL(1)
 #define MPTCP_SUBFLOW_FLAG_JOIN_REM		_BITUL(2)
-- 
cgit v1.2.3


From 29c1ac230e6056b26846c66881802b581a78ad72 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 30 Jun 2022 13:25:57 +0100
Subject: io_uring: keep sendrecv flags in ioprio

We waste a u64 SQE field for flags even though we don't need as many
bits and it can be used for something more useful later. Store io_uring
specific send/recv flags in sqe->ioprio instead of ->addr2.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Fixes: 0455d4ccec54 ("io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg")
[axboe: change comment in io_uring.h as well]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 53e7dae92e42..f10b59d6693e 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -244,7 +244,7 @@ enum io_uring_op {
 #define IORING_ASYNC_CANCEL_ANY	(1U << 2)
 
 /*
- * send/sendmsg and recv/recvmsg flags (sqe->addr2)
+ * send/sendmsg and recv/recvmsg flags (sqe->ioprio)
  *
  * IORING_RECVSEND_POLL_FIRST	If set, instead of first attempting to send
  *				or receive and arm poll if that yields an
-- 
cgit v1.2.3