From f64c4acea51fbe2c08c0b0f48b7f5d1657d7a5e4 Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vfedorenko@novek.ru>
Date: Fri, 10 Sep 2021 01:04:08 +0300
Subject: bpf: Add hardware timestamp field to __sk_buff

BPF programs may want to know hardware timestamps if NIC supports
such timestamping.

Expose this data as hwtstamp field of __sk_buff the same way as
gso_segs/gso_size. This field could be accessed from the same
programs as tstamp field, but it's read-only field. Explicit test
to deny access to padding data is added to bpf_skb_is_valid_access.

Also update BPF_PROG_TEST_RUN tests of the feature.

Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210909220409.8804-2-vfedorenko@novek.ru
---
 include/uapi/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 791f31dd0abe..51cfd91cc387 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5284,6 +5284,8 @@ struct __sk_buff {
 	__u32 gso_segs;
 	__bpf_md_ptr(struct bpf_sock *, sk);
 	__u32 gso_size;
+	__u32 :32;		/* Padding, future use. */
+	__u64 hwtstamp;
 };
 
 struct bpf_tunnel_key {
-- 
cgit v1.2.3


From dd47c104533dedb90434a3f142e94a671ac623a6 Mon Sep 17 00:00:00 2001
From: Eugene Syromiatnikov <esyr@redhat.com>
Date: Mon, 13 Sep 2021 17:44:15 +0200
Subject: io-wq: provide IO_WQ_* constants for IORING_REGISTER_IOWQ_MAX_WORKERS
 arg items

The items passed in the array pointed by the arg parameter
of IORING_REGISTER_IOWQ_MAX_WORKERS io_uring_register operation
carry certain semantics: they refer to different io-wq worker categories;
provide IO_WQ_* constants in the UAPI, so these categories can be referenced
in the user space code.

Suggested-by: Jens Axboe <axboe@kernel.dk>
Complements: 2e480058ddc21ec5 ("io-wq: provide a way to limit max number of workers")
Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
Link: https://lore.kernel.org/r/20210913154415.GA12890@asgard.redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 59ef35154e3d..b270a07b285e 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -317,13 +317,19 @@ enum {
 	IORING_REGISTER_IOWQ_AFF		= 17,
 	IORING_UNREGISTER_IOWQ_AFF		= 18,
 
-	/* set/get max number of workers */
+	/* set/get max number of io-wq workers */
 	IORING_REGISTER_IOWQ_MAX_WORKERS	= 19,
 
 	/* this goes last */
 	IORING_REGISTER_LAST
 };
 
+/* io-wq worker categories */
+enum {
+	IO_WQ_BOUND,
+	IO_WQ_UNBOUND,
+};
+
 /* deprecated, see struct io_uring_rsrc_update */
 struct io_uring_files_update {
 	__u32 offset;
-- 
cgit v1.2.3


From 856c02dbce4f8d6a5644083db22c11750aa11481 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 10 Sep 2021 11:33:51 -0700
Subject: bpf: Introduce helper bpf_get_branch_snapshot

Introduce bpf_get_branch_snapshot(), which allows tracing pogram to get
branch trace from hardware (e.g. Intel LBR). To use the feature, the
user need to create perf_event with proper branch_record filtering
on each cpu, and then calls bpf_get_branch_snapshot in the bpf function.
On Intel CPUs, VLBR event (raw event 0x1b00) can be use for this.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210910183352.3151445-3-songliubraving@fb.com
---
 include/uapi/linux/bpf.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 51cfd91cc387..d21326558d42 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4877,6 +4877,27 @@ union bpf_attr {
  *		Get the struct pt_regs associated with **task**.
  *	Return
  *		A pointer to struct pt_regs.
+ *
+ * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags)
+ *	Description
+ *		Get branch trace from hardware engines like Intel LBR. The
+ *		hardware engine is stopped shortly after the helper is
+ *		called. Therefore, the user need to filter branch entries
+ *		based on the actual use case. To capture branch trace
+ *		before the trigger point of the BPF program, the helper
+ *		should be called at the beginning of the BPF program.
+ *
+ *		The data is stored as struct perf_branch_entry into output
+ *		buffer *entries*. *size* is the size of *entries* in bytes.
+ *		*flags* is reserved for now and must be zero.
+ *
+ *	Return
+ *		On success, number of bytes written to *buf*. On error, a
+ *		negative value.
+ *
+ *		**-EINVAL** if *flags* is not zero.
+ *
+ *		**-ENOENT** if architecture does not support branch records.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5055,6 +5076,7 @@ union bpf_attr {
 	FN(get_func_ip),		\
 	FN(get_attach_cookie),		\
 	FN(task_pt_regs),		\
+	FN(get_branch_snapshot),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 099dd788e31b4f426ef49c2785069804925a84e1 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Mon, 13 Sep 2021 14:51:10 -0500
Subject: cifs: remove pathname for file from SPDX header

checkpatch complains about source files with filenames (e.g. in
these cases just below the SPDX header in comments at the top of
various files in fs/cifs). It also is helpful to change this now
so will be less confusing when the parent directory is renamed
e.g. from fs/cifs to fs/smb_client (or fs/smbfs)

Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 include/uapi/linux/cifs/cifs_mount.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/cifs/cifs_mount.h b/include/uapi/linux/cifs/cifs_mount.h
index 69829205fdb5..8e87d27b0951 100644
--- a/include/uapi/linux/cifs/cifs_mount.h
+++ b/include/uapi/linux/cifs/cifs_mount.h
@@ -1,6 +1,5 @@
 /* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */
 /*
- *   include/uapi/linux/cifs/cifs_mount.h
  *
  *   Author(s): Scott Lovenberg (scott.lovenberg@gmail.com)
  *
-- 
cgit v1.2.3


From b564171ade70570b7f335fa8ed17adb28409e3ac Mon Sep 17 00:00:00 2001
From: Li Li <dualli@google.com>
Date: Fri, 10 Sep 2021 09:42:10 -0700
Subject: binder: fix freeze race

Currently cgroup freezer is used to freeze the application threads, and
BINDER_FREEZE is used to freeze the corresponding binder interface.
There's already a mechanism in ioctl(BINDER_FREEZE) to wait for any
existing transactions to drain out before actually freezing the binder
interface.

But freezing an app requires 2 steps, freezing the binder interface with
ioctl(BINDER_FREEZE) and then freezing the application main threads with
cgroupfs. This is not an atomic operation. The following race issue
might happen.

1) Binder interface is frozen by ioctl(BINDER_FREEZE);
2) Main thread A initiates a new sync binder transaction to process B;
3) Main thread A is frozen by "echo 1 > cgroup.freeze";
4) The response from process B reaches the frozen thread, which will
unexpectedly fail.

This patch provides a mechanism to check if there's any new pending
transaction happening between ioctl(BINDER_FREEZE) and freezing the
main thread. If there's any, the main thread freezing operation can
be rolled back to finish the pending transaction.

Furthermore, the response might reach the binder driver before the
rollback actually happens. That will still cause failed transaction.

As the other process doesn't wait for another response of the response,
the response transaction failure can be fixed by treating the response
transaction like an oneway/async one, allowing it to reach the frozen
thread. And it will be consumed when the thread gets unfrozen later.

NOTE: This patch reuses the existing definition of struct
binder_frozen_status_info but expands the bit assignments of __u32
member sync_recv.

To ensure backward compatibility, bit 0 of sync_recv still indicates
there's an outstanding sync binder transaction. This patch adds new
information to bit 1 of sync_recv, indicating the binder transaction
happens exactly when there's a race.

If an existing userspace app runs on a new kernel, a sync binder call
will set bit 0 of sync_recv so ioctl(BINDER_GET_FROZEN_INFO) still
return the expected value (true). The app just doesn't check bit 1
intentionally so it doesn't have the ability to tell if there's a race.
This behavior is aligned with what happens on an old kernel which
doesn't set bit 1 at all.

A new userspace app can 1) check bit 0 to know if there's a sync binder
transaction happened when being frozen - same as before; and 2) check
bit 1 to know if that sync binder transaction happened exactly when
there's a race - a new information for rollback decision.

the same time, confirmed the pending transactions succeeded.

Fixes: 432ff1e91694 ("binder: BINDER_FREEZE ioctl")
Acked-by: Todd Kjos <tkjos@google.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Li Li <dualli@google.com>
Test: stress test with apps being frozen and initiating binder calls at
Link: https://lore.kernel.org/r/20210910164210.2282716-2-dualli@chromium.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/android/binder.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index 20e435fe657a..3246f2c74696 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -225,7 +225,14 @@ struct binder_freeze_info {
 
 struct binder_frozen_status_info {
 	__u32            pid;
+
+	/* process received sync transactions since last frozen
+	 * bit 0: received sync transaction after being frozen
+	 * bit 1: new pending sync transaction during freezing
+	 */
 	__u32            sync_recv;
+
+	/* process received async transactions since last frozen */
 	__u32            async_recv;
 };
 
-- 
cgit v1.2.3


From 844f7eaaed9267ae17d33778efe65548cc940205 Mon Sep 17 00:00:00 2001
From: Eugene Syromiatnikov <esyr@redhat.com>
Date: Sun, 12 Sep 2021 14:22:34 +0200
Subject: include/uapi/linux/xfrm.h: Fix XFRM_MSG_MAPPING ABI breakage

Commit 2d151d39073a ("xfrm: Add possibility to set the default to block
if we have no policy") broke ABI by changing the value of the XFRM_MSG_MAPPING
enum item, thus also evading the build-time check
in security/selinux/nlmsgtab.c:selinux_nlmsg_lookup for presence of proper
security permission checks in nlmsg_xfrm_perms.  Fix it by placing
XFRM_MSG_SETDEFAULT/XFRM_MSG_GETDEFAULT to the end of the enum, right before
__XFRM_MSG_MAX, and updating the nlmsg_xfrm_perms accordingly.

Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy")
References: https://lore.kernel.org/netdev/20210901151402.GA2557@altlinux.org/
Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
Acked-by: Antony Antony <antony.antony@secunet.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/uapi/linux/xfrm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index b96c1ea7166d..26f456b1f33e 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -213,13 +213,13 @@ enum {
 	XFRM_MSG_GETSPDINFO,
 #define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO
 
+	XFRM_MSG_MAPPING,
+#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING
+
 	XFRM_MSG_SETDEFAULT,
 #define XFRM_MSG_SETDEFAULT XFRM_MSG_SETDEFAULT
 	XFRM_MSG_GETDEFAULT,
 #define XFRM_MSG_GETDEFAULT XFRM_MSG_GETDEFAULT
-
-	XFRM_MSG_MAPPING,
-#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING
 	__XFRM_MSG_MAX
 };
 #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1)
-- 
cgit v1.2.3


From 059ebe4fe332c5d1c25124166527cdf9fe43a3ce Mon Sep 17 00:00:00 2001
From: Andra Paraschiv <andraprs@amazon.com>
Date: Fri, 27 Aug 2021 18:49:29 +0300
Subject: nitro_enclaves: Add fixes for checkpatch spell check reports

Fix the typos in the words spelling as per the checkpatch script
reports.

Reviewed-by: George-Aurelian Popescu <popegeo@amazon.com>
Signed-off-by: Andra Paraschiv <andraprs@amazon.com>
Link: https://lore.kernel.org/r/20210827154930.40608-7-andraprs@amazon.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/nitro_enclaves.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nitro_enclaves.h b/include/uapi/linux/nitro_enclaves.h
index b945073fe544..e808f5ba124d 100644
--- a/include/uapi/linux/nitro_enclaves.h
+++ b/include/uapi/linux/nitro_enclaves.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
- * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * Copyright 2020-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  */
 
 #ifndef _UAPI_LINUX_NITRO_ENCLAVES_H_
@@ -60,7 +60,7 @@
  *
  * Context: Process context.
  * Return:
- * * 0					- Logic succesfully completed.
+ * * 0					- Logic successfully completed.
  * *  -1				- There was a failure in the ioctl logic.
  * On failure, errno is set to:
  * * EFAULT				- copy_from_user() / copy_to_user() failure.
@@ -95,7 +95,7 @@
  *
  * Context: Process context.
  * Return:
- * * 0				- Logic succesfully completed.
+ * * 0				- Logic successfully completed.
  * *  -1			- There was a failure in the ioctl logic.
  * On failure, errno is set to:
  * * EFAULT			- copy_from_user() / copy_to_user() failure.
@@ -118,7 +118,7 @@
  *
  * Context: Process context.
  * Return:
- * * 0					- Logic succesfully completed.
+ * * 0					- Logic successfully completed.
  * *  -1				- There was a failure in the ioctl logic.
  * On failure, errno is set to:
  * * EFAULT				- copy_from_user() failure.
@@ -161,7 +161,7 @@
  *
  * Context: Process context.
  * Return:
- * * 0					- Logic succesfully completed.
+ * * 0					- Logic successfully completed.
  * *  -1				- There was a failure in the ioctl logic.
  * On failure, errno is set to:
  * * EFAULT				- copy_from_user() / copy_to_user() failure.
-- 
cgit v1.2.3


From fa0866625543b4d8b3d026e4e0ef5ec25a453920 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Tue, 14 Sep 2021 10:35:05 +0200
Subject: net/smc: add support for user defined EIDs

SMC-Dv2 allows users to define EIDs which allows to create separate
name spaces enabling users to cluster their SMC-Dv2 connections.
Add support for user defined EIDs and extent the generic netlink
interface so users can add, remove and dump EIDs.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Guvenc Gulce  <guvenc@linux.ibm.com>
Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/smc.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index 0f7f87c70baf..e3728af2832b 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -38,6 +38,9 @@ enum {				/* SMC PNET Table commands */
 #define SMC_GENL_FAMILY_VERSION		1
 
 #define SMC_PCI_ID_STR_LEN		16 /* Max length of pci id string */
+#define SMC_MAX_HOSTNAME_LEN		32 /* Max length of the hostname */
+#define SMC_MAX_UEID			4  /* Max number of user EIDs */
+#define SMC_MAX_EID_LEN			32 /* Max length of an EID */
 
 /* SMC_GENL_FAMILY commands */
 enum {
@@ -49,6 +52,10 @@ enum {
 	SMC_NETLINK_GET_DEV_SMCR,
 	SMC_NETLINK_GET_STATS,
 	SMC_NETLINK_GET_FBACK_STATS,
+	SMC_NETLINK_DUMP_UEID,
+	SMC_NETLINK_ADD_UEID,
+	SMC_NETLINK_REMOVE_UEID,
+	SMC_NETLINK_FLUSH_UEID,
 };
 
 /* SMC_GENL_FAMILY top level attributes */
@@ -242,4 +249,12 @@ enum {
 	__SMC_NLA_FBACK_STATS_MAX,
 	SMC_NLA_FBACK_STATS_MAX = __SMC_NLA_FBACK_STATS_MAX - 1
 };
+
+/* SMC_NETLINK_UEID attributes */
+enum {
+	SMC_NLA_EID_TABLE_UNSPEC,
+	SMC_NLA_EID_TABLE_ENTRY,	/* string */
+	__SMC_NLA_EID_TABLE_MAX,
+	SMC_NLA_EID_TABLE_MAX = __SMC_NLA_EID_TABLE_MAX - 1
+};
 #endif /* _UAPI_LINUX_SMC_H */
-- 
cgit v1.2.3


From 3c572145c24e21c24e1cd0fd168011eaba85da8e Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Tue, 14 Sep 2021 10:35:07 +0200
Subject: net/smc: add generic netlink support for system EID

With SMC-Dv2 users can configure if the static system EID should be used
during CLC handshake, or if only user EIDs are allowed.
Add generic netlink support to enable and disable the system EID, and
to retrieve the system EID and its current enabled state.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Guvenc Gulce  <guvenc@linux.ibm.com>
Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/smc.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index e3728af2832b..b175bd0165a1 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -56,6 +56,9 @@ enum {
 	SMC_NETLINK_ADD_UEID,
 	SMC_NETLINK_REMOVE_UEID,
 	SMC_NETLINK_FLUSH_UEID,
+	SMC_NETLINK_DUMP_SEID,
+	SMC_NETLINK_ENABLE_SEID,
+	SMC_NETLINK_DISABLE_SEID,
 };
 
 /* SMC_GENL_FAMILY top level attributes */
@@ -257,4 +260,13 @@ enum {
 	__SMC_NLA_EID_TABLE_MAX,
 	SMC_NLA_EID_TABLE_MAX = __SMC_NLA_EID_TABLE_MAX - 1
 };
+
+/* SMC_NETLINK_SEID attributes */
+enum {
+	SMC_NLA_SEID_UNSPEC,
+	SMC_NLA_SEID_ENTRY,	/* string */
+	SMC_NLA_SEID_ENABLED,	/* u8 */
+	__SMC_NLA_SEID_TABLE_MAX,
+	SMC_NLA_SEID_TABLE_MAX = __SMC_NLA_SEID_TABLE_MAX - 1
+};
 #endif /* _UAPI_LINUX_SMC_H */
-- 
cgit v1.2.3


From 41ced4cd88020c9d4b71ff7c50d020f081efa4a0 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 14 Sep 2021 15:30:09 -0700
Subject: btf: Change BTF_KIND_* macros to enums

Change BTF_KIND_* macros to enums so they are encoded in dwarf and
appear in vmlinux.h. This will make it easier for bpf programs
to use these constants without macro definitions.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210914223009.245307-1-yhs@fb.com
---
 include/uapi/linux/btf.h | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index d27b1708efe9..10e401073dd1 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -56,25 +56,28 @@ struct btf_type {
 #define BTF_INFO_VLEN(info)	((info) & 0xffff)
 #define BTF_INFO_KFLAG(info)	((info) >> 31)
 
-#define BTF_KIND_UNKN		0	/* Unknown	*/
-#define BTF_KIND_INT		1	/* Integer	*/
-#define BTF_KIND_PTR		2	/* Pointer	*/
-#define BTF_KIND_ARRAY		3	/* Array	*/
-#define BTF_KIND_STRUCT		4	/* Struct	*/
-#define BTF_KIND_UNION		5	/* Union	*/
-#define BTF_KIND_ENUM		6	/* Enumeration	*/
-#define BTF_KIND_FWD		7	/* Forward	*/
-#define BTF_KIND_TYPEDEF	8	/* Typedef	*/
-#define BTF_KIND_VOLATILE	9	/* Volatile	*/
-#define BTF_KIND_CONST		10	/* Const	*/
-#define BTF_KIND_RESTRICT	11	/* Restrict	*/
-#define BTF_KIND_FUNC		12	/* Function	*/
-#define BTF_KIND_FUNC_PROTO	13	/* Function Proto	*/
-#define BTF_KIND_VAR		14	/* Variable	*/
-#define BTF_KIND_DATASEC	15	/* Section	*/
-#define BTF_KIND_FLOAT		16	/* Floating point	*/
-#define BTF_KIND_MAX		BTF_KIND_FLOAT
-#define NR_BTF_KINDS		(BTF_KIND_MAX + 1)
+enum {
+	BTF_KIND_UNKN		= 0,	/* Unknown	*/
+	BTF_KIND_INT		= 1,	/* Integer	*/
+	BTF_KIND_PTR		= 2,	/* Pointer	*/
+	BTF_KIND_ARRAY		= 3,	/* Array	*/
+	BTF_KIND_STRUCT		= 4,	/* Struct	*/
+	BTF_KIND_UNION		= 5,	/* Union	*/
+	BTF_KIND_ENUM		= 6,	/* Enumeration	*/
+	BTF_KIND_FWD		= 7,	/* Forward	*/
+	BTF_KIND_TYPEDEF	= 8,	/* Typedef	*/
+	BTF_KIND_VOLATILE	= 9,	/* Volatile	*/
+	BTF_KIND_CONST		= 10,	/* Const	*/
+	BTF_KIND_RESTRICT	= 11,	/* Restrict	*/
+	BTF_KIND_FUNC		= 12,	/* Function	*/
+	BTF_KIND_FUNC_PROTO	= 13,	/* Function Proto	*/
+	BTF_KIND_VAR		= 14,	/* Variable	*/
+	BTF_KIND_DATASEC	= 15,	/* Section	*/
+	BTF_KIND_FLOAT		= 16,	/* Floating point	*/
+
+	NR_BTF_KINDS,
+	BTF_KIND_MAX		= NR_BTF_KINDS - 1,
+};
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
  * followed by extra data.
-- 
cgit v1.2.3


From b5ea834dde6b6e7f75e51d5f66dac8cd7c97b5ef Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 14 Sep 2021 15:30:15 -0700
Subject: bpf: Support for new btf kind BTF_KIND_TAG

LLVM14 added support for a new C attribute ([1])
  __attribute__((btf_tag("arbitrary_str")))
This attribute will be emitted to dwarf ([2]) and pahole
will convert it to BTF. Or for bpf target, this
attribute will be emitted to BTF directly ([3], [4]).
The attribute is intended to provide additional
information for
  - struct/union type or struct/union member
  - static/global variables
  - static/global function or function parameter.

For linux kernel, the btf_tag can be applied
in various places to specify user pointer,
function pre- or post- condition, function
allow/deny in certain context, etc. Such information
will be encoded in vmlinux BTF and can be used
by verifier.

The btf_tag can also be applied to bpf programs
to help global verifiable functions, e.g.,
specifying preconditions, etc.

This patch added basic parsing and checking support
in kernel for new BTF_KIND_TAG kind.

 [1] https://reviews.llvm.org/D106614
 [2] https://reviews.llvm.org/D106621
 [3] https://reviews.llvm.org/D106622
 [4] https://reviews.llvm.org/D109560

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210914223015.245546-1-yhs@fb.com
---
 include/uapi/linux/btf.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 10e401073dd1..642b6ecb37d7 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -43,7 +43,7 @@ struct btf_type {
 	 * "size" tells the size of the type it is describing.
 	 *
 	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
-	 * FUNC, FUNC_PROTO and VAR.
+	 * FUNC, FUNC_PROTO, VAR and TAG.
 	 * "type" is a type_id referring to another type.
 	 */
 	union {
@@ -74,6 +74,7 @@ enum {
 	BTF_KIND_VAR		= 14,	/* Variable	*/
 	BTF_KIND_DATASEC	= 15,	/* Section	*/
 	BTF_KIND_FLOAT		= 16,	/* Floating point	*/
+	BTF_KIND_TAG		= 17,	/* Tag */
 
 	NR_BTF_KINDS,
 	BTF_KIND_MAX		= NR_BTF_KINDS - 1,
@@ -173,4 +174,15 @@ struct btf_var_secinfo {
 	__u32	size;
 };
 
+/* BTF_KIND_TAG is followed by a single "struct btf_tag" to describe
+ * additional information related to the tag applied location.
+ * If component_idx == -1, the tag is applied to a struct, union,
+ * variable or function. Otherwise, it is applied to a struct/union
+ * member or a func argument, and component_idx indicates which member
+ * or argument (0 ... vlen-1).
+ */
+struct btf_tag {
+       __s32   component_idx;
+};
+
 #endif /* _UAPI__LINUX_BTF_H__ */
-- 
cgit v1.2.3


From 67f1e027c27054e641584655020a417eaac9cb3a Mon Sep 17 00:00:00 2001
From: Lukas Prediger <lumip@lumip.de>
Date: Tue, 14 Sep 2021 00:09:42 +0100
Subject: drivers/cdrom: improved ioctl for media change detection

The current implementation of the CDROM_MEDIA_CHANGED ioctl relies on
global state, meaning that only one process can detect a disc change
while the ioctl call will return 0 for other calling processes afterwards
(see bug 213267).

This introduces a new cdrom ioctl, CDROM_TIMED_MEDIA_CHANGE, that
works by maintaining a timestamp of the last detected disc change instead
of a boolean flag: Processes calling this ioctl command can provide
a timestamp of the last disc change known to them and receive
an indication whether the disc was changed since then and the updated
timestamp.

I considered fixing the buggy behavior in the original
CDROM_MEDIA_CHANGED ioctl but that would require maintaining state
for each calling process in the kernel, which seems like a worse
solution than introducing this new ioctl.

Signed-off-by: Lukas Prediger <lumip@lumip.de>
Link: https://lore.kernel.org/all/20210912191207.74449-1-lumip@lumip.de
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210913230942.1188-1-phil@philpotter.co.uk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/cdrom.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h
index 6c34f6e2f1f7..804ff8d98f71 100644
--- a/include/uapi/linux/cdrom.h
+++ b/include/uapi/linux/cdrom.h
@@ -147,6 +147,8 @@
 #define CDROM_NEXT_WRITABLE	0x5394	/* get next writable block */
 #define CDROM_LAST_WRITTEN	0x5395	/* get last block written on disc */
 
+#define CDROM_TIMED_MEDIA_CHANGE   0x5396  /* get the timestamp of the last media change */
+
 /*******************************************************
  * CDROM IOCTL structures
  *******************************************************/
@@ -295,6 +297,23 @@ struct cdrom_generic_command
 	};
 };
 
+/* This struct is used by CDROM_TIMED_MEDIA_CHANGE */
+struct cdrom_timed_media_change_info {
+	__s64	last_media_change;	/* Timestamp of the last detected media
+					 * change in ms. May be set by caller,
+					 * updated upon successful return of
+					 * ioctl.
+					 */
+	__u64	media_flags;		/* Flags returned by ioctl to indicate
+					 * media status.
+					 */
+};
+#define MEDIA_CHANGED_FLAG	0x1	/* Last detected media change was more
+					 * recent than last_media_change set by
+					 * caller.
+					 */
+/* other bits of media_flags available for future use */
+
 /*
  * A CD-ROM physical sector size is 2048, 2052, 2056, 2324, 2332, 2336, 
  * 2340, or 2352 bytes long.  
-- 
cgit v1.2.3


From f8d858e607b2a36808ac6d4218f5f5203d7a7d63 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 14 Sep 2021 16:46:33 +0200
Subject: xfrm: make user policy API complete

>From a userland POV, this API was based on some magic values:
 - dirmask and action were bitfields but meaning of bits
   (XFRM_POL_DEFAULT_*) are not exported;
 - action is confusing, if a bit is set, does it mean drop or accept?

Let's try to simplify this uapi by using explicit field and macros.

Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/uapi/linux/xfrm.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 26f456b1f33e..eda0426ec4c2 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -514,9 +514,12 @@ struct xfrm_user_offload {
 #define XFRM_OFFLOAD_INBOUND	2
 
 struct xfrm_userpolicy_default {
-#define XFRM_USERPOLICY_DIRMASK_MAX	(sizeof(__u8) * 8)
-	__u8				dirmask;
-	__u8				action;
+#define XFRM_USERPOLICY_UNSPEC	0
+#define XFRM_USERPOLICY_BLOCK	1
+#define XFRM_USERPOLICY_ACCEPT	2
+	__u8				in;
+	__u8				fwd;
+	__u8				out;
 };
 
 #ifndef __KERNEL__
-- 
cgit v1.2.3


From 336562752acc1a723f9a24b5b8129ae22e0478c6 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Wed, 15 Sep 2021 01:54:00 +0200
Subject: bpf: Update bpf_get_smp_processor_id() documentation

BPF programs run with migration disabled regardless of preemption, as
they are protected by migrate_disable(). Update the uapi documentation
accordingly.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210914235400.59427-1-mcroce@linux.microsoft.com
---
 include/uapi/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d21326558d42..3e9785f1064a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1629,7 +1629,7 @@ union bpf_attr {
  * u32 bpf_get_smp_processor_id(void)
  * 	Description
  * 		Get the SMP (symmetric multiprocessing) processor id. Note that
- * 		all programs run with preemption disabled, which means that the
+ * 		all programs run with migration disabled, which means that the
  * 		SMP processor id is stable during all the execution of the
  * 		program.
  * 	Return
-- 
cgit v1.2.3


From 227b9644ab16d2ecd98d593edbe15c32c0c9620a Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 16 Sep 2021 11:37:38 +0800
Subject: net/tls: support SM4 GCM/CCM algorithm

The RFC8998 specification defines the use of the ShangMi algorithm
cipher suites in TLS 1.3, and also supports the GCM/CCM mode using
the SM4 algorithm.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tls.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index 0d54baea1d8d..5f38be0ec0f3 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -84,6 +84,20 @@
 #define TLS_CIPHER_CHACHA20_POLY1305_TAG_SIZE	16
 #define TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE	8
 
+#define TLS_CIPHER_SM4_GCM				55
+#define TLS_CIPHER_SM4_GCM_IV_SIZE			8
+#define TLS_CIPHER_SM4_GCM_KEY_SIZE		16
+#define TLS_CIPHER_SM4_GCM_SALT_SIZE		4
+#define TLS_CIPHER_SM4_GCM_TAG_SIZE		16
+#define TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE		8
+
+#define TLS_CIPHER_SM4_CCM				56
+#define TLS_CIPHER_SM4_CCM_IV_SIZE			8
+#define TLS_CIPHER_SM4_CCM_KEY_SIZE		16
+#define TLS_CIPHER_SM4_CCM_SALT_SIZE		4
+#define TLS_CIPHER_SM4_CCM_TAG_SIZE		16
+#define TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE		8
+
 #define TLS_SET_RECORD_TYPE	1
 #define TLS_GET_RECORD_TYPE	2
 
@@ -124,6 +138,22 @@ struct tls12_crypto_info_chacha20_poly1305 {
 	unsigned char rec_seq[TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE];
 };
 
+struct tls12_crypto_info_sm4_gcm {
+	struct tls_crypto_info info;
+	unsigned char iv[TLS_CIPHER_SM4_GCM_IV_SIZE];
+	unsigned char key[TLS_CIPHER_SM4_GCM_KEY_SIZE];
+	unsigned char salt[TLS_CIPHER_SM4_GCM_SALT_SIZE];
+	unsigned char rec_seq[TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE];
+};
+
+struct tls12_crypto_info_sm4_ccm {
+	struct tls_crypto_info info;
+	unsigned char iv[TLS_CIPHER_SM4_CCM_IV_SIZE];
+	unsigned char key[TLS_CIPHER_SM4_CCM_KEY_SIZE];
+	unsigned char salt[TLS_CIPHER_SM4_CCM_SALT_SIZE];
+	unsigned char rec_seq[TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE];
+};
+
 enum {
 	TLS_INFO_UNSPEC,
 	TLS_INFO_VERSION,
-- 
cgit v1.2.3


From 10aceb629e198429c849d5e995c3bb1ba7a9aaa3 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Fri, 17 Sep 2021 11:29:05 -0700
Subject: bpf: Add bpf_trace_vprintk helper

This helper is meant to be "bpf_trace_printk, but with proper vararg
support". Follow bpf_snprintf's example and take a u64 pseudo-vararg
array. Write to /sys/kernel/debug/tracing/trace_pipe using the same
mechanism as bpf_trace_printk. The functionality of this helper was
requested in the libbpf issue tracker [0].

[0] Closes: https://github.com/libbpf/libbpf/issues/315

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210917182911.2426606-4-davemarchevsky@fb.com
---
 include/uapi/linux/bpf.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3e9785f1064a..98ca79a67937 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4898,6 +4898,16 @@ union bpf_attr {
  *		**-EINVAL** if *flags* is not zero.
  *
  *		**-ENOENT** if architecture does not support branch records.
+ *
+ * long bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ *	Description
+ *		Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64
+ *		to format and can handle more format args as a result.
+ *
+ *		Arguments are to be used as in **bpf_seq_printf**\ () helper.
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5077,6 +5087,7 @@ union bpf_attr {
 	FN(get_attach_cookie),		\
 	FN(task_pt_regs),		\
 	FN(get_branch_snapshot),	\
+	FN(trace_vprintk),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From a42effb0b24fcaf49513c2d7d77ef6daa9e32a6f Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Fri, 17 Sep 2021 11:29:11 -0700
Subject: bpf: Clarify data_len param in bpf_snprintf and bpf_seq_printf
 comments

Since the data_len in these two functions is a byte len of the preceding
u64 *data array, it must always be a multiple of 8. If this isn't the
case both helpers error out, so let's make the requirement explicit so
users don't need to infer it.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210917182911.2426606-10-davemarchevsky@fb.com
---
 include/uapi/linux/bpf.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 98ca79a67937..6fc59d61937a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4046,7 +4046,7 @@ union bpf_attr {
  * 		arguments. The *data* are a **u64** array and corresponding format string
  * 		values are stored in the array. For strings and pointers where pointees
  * 		are accessed, only the pointer values are stored in the *data* array.
- * 		The *data_len* is the size of *data* in bytes.
+ * 		The *data_len* is the size of *data* in bytes - must be a multiple of 8.
  *
  *		Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
  *		Reading kernel memory may fail due to either invalid address or
@@ -4751,7 +4751,8 @@ union bpf_attr {
  *		Each format specifier in **fmt** corresponds to one u64 element
  *		in the **data** array. For strings and pointers where pointees
  *		are accessed, only the pointer values are stored in the *data*
- *		array. The *data_len* is the size of *data* in bytes.
+ *		array. The *data_len* is the size of *data* in bytes - must be
+ *		a multiple of 8.
  *
  *		Formats **%s** and **%p{i,I}{4,6}** require to read kernel
  *		memory. Reading kernel memory may fail due to either invalid
-- 
cgit v1.2.3


From 55c42fa7fa331f98062c32799456420930b8bf8c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 17 Sep 2021 16:33:19 -0700
Subject: mptcp: add MPTCP_INFO getsockopt

Its not compatible with multipath-tcp.org kernel one.

1. The out-of-tree implementation defines a different 'struct mptcp_info',
   with embedded __user addresses for additional data such as
   endpoint addresses.

2. Mat Martineau points out that embedded __user addresses doesn't work
with BPF_CGROUP_RUN_PROG_GETSOCKOPT() which assumes that copying in
optsize bytes from optval provides all data that got copied to userspace.

This provides mptcp_info data for the given mptcp socket.

Userspace sets optlen to the size of the structure it expects.
The kernel updates it to contain the number of bytes that it copied.

This allows to append more information to the structure later.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mptcp.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index f66038b9551f..3e9caeddda7e 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -193,4 +193,7 @@ enum mptcp_event_attr {
 #define MPTCP_RST_EBADPERF	5
 #define MPTCP_RST_EMIDDLEBOX	6
 
+/* MPTCP socket options */
+#define MPTCP_INFO 1
+
 #endif /* _UAPI_MPTCP_H */
-- 
cgit v1.2.3


From 06f15cee369535a383c9c82ed37a25f0a413f6f1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 17 Sep 2021 16:33:20 -0700
Subject: mptcp: add MPTCP_TCPINFO getsockopt support

Allow users to retrieve TCP_INFO data of all subflows.

Users need to pre-initialize a meta header that has to be
prepended to the data buffer that will be filled with the tcp info data.

The meta header looks like this:

struct mptcp_subflow_data {
 __u32 size_subflow_data;/* size of this structure in userspace */
 __u32 num_subflows;	/* must be 0, set by kernel */
 __u32 size_kernel;	/* must be 0, set by kernel */
 __u32 size_user;	/* size of one element in data[] */
} __attribute__((aligned(8)));

size_subflow_data has to be set to 'sizeof(struct mptcp_subflow_data)'.
This allows to extend mptcp_subflow_data structure later on without
breaking backwards compatibility.

If the structure is extended later on, kernel knows where the
userspace-provided meta header ends, even if userspace uses an older
(smaller) version of the structure.

num_subflows must be set to 0. If the getsockopt request succeeds (return
value is 0), it will be updated to contain the number of active subflows
for the given logical connection.

size_kernel must be set to 0. If the getsockopt request is successful,
it will contain the size of the 'struct tcp_info' as known by the kernel.
This is informational only.

size_user must be set to 'sizeof(struct tcp_info)'.

This allows the kernel to only fill in the space reserved/expected by
userspace.

Example:

struct my_tcp_info {
  struct mptcp_subflow_data d;
  struct tcp_info ti[2];
};
struct my_tcp_info ti;
socklen_t olen;

memset(&ti, 0, sizeof(ti));

ti.d.size_subflow_data = sizeof(struct mptcp_subflow_data);
ti.d.size_user = sizeof(struct tcp_info);
olen = sizeof(ti);

ret = getsockopt(fd, SOL_MPTCP, MPTCP_TCPINFO, &ti, &olen);
if (ret < 0)
	die_perror("getsockopt MPTCP_TCPINFO");

mptcp_subflow_data.num_subflows is populated with the number of
subflows that exist on the kernel side for the logical mptcp connection.

This allows userspace to re-try with a larger tcp_info array if the number
of subflows was larger than the available space in the ti[] array.

olen has to be set to the number of bytes that userspace has allocated to
receive the kernel data.  It will be updated to contain the real number
bytes that have been copied to by the kernel.

In the above example, if the number if subflows was 1, olen is equal to
'sizeof(struct mptcp_subflow_data) + sizeof(struct tcp_info).
For 2 or more subflows olen is equal to 'sizeof(struct my_tcp_info)'.

If there was more data that could not be copied due to lack of space
in the option buffer, userspace can detect this by checking
mptcp_subflow_data->num_subflows.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mptcp.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 3e9caeddda7e..3f013a513770 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -193,7 +193,15 @@ enum mptcp_event_attr {
 #define MPTCP_RST_EBADPERF	5
 #define MPTCP_RST_EMIDDLEBOX	6
 
+struct mptcp_subflow_data {
+	__u32		size_subflow_data;		/* size of this structure in userspace */
+	__u32		num_subflows;			/* must be 0, set by kernel */
+	__u32		size_kernel;			/* must be 0, set by kernel */
+	__u32		size_user;			/* size of one element in data[] */
+} __attribute__((aligned(8)));
+
 /* MPTCP socket options */
-#define MPTCP_INFO 1
+#define MPTCP_INFO		1
+#define MPTCP_TCPINFO		2
 
 #endif /* _UAPI_MPTCP_H */
-- 
cgit v1.2.3


From c11c5906bc0aba62a78da69035f6b30c6da6d13b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 17 Sep 2021 16:33:21 -0700
Subject: mptcp: add MPTCP_SUBFLOW_ADDRS getsockopt support

This retrieves the address pairs of all subflows currently
active for a given mptcp connection.

It re-uses the same meta-header as for MPTCP_TCPINFO.

A new structure is provided to hold the subflow
address data:

struct mptcp_subflow_addrs {
	union {
		__kernel_sa_family_t sa_family;
		struct sockaddr sa_local;
		struct sockaddr_in sin_local;
		struct sockaddr_in6 sin6_local;
		struct sockaddr_storage ss_local;
	};
	union {
		struct sockaddr sa_remote;
		struct sockaddr_in sin_remote;
		struct sockaddr_in6 sin6_remote;
		struct sockaddr_storage ss_remote;
	};
};

Usage of the new getsockopt is very similar to
MPTCP_TCPINFO one.

Userspace allocates a
'struct mptcp_subflow_data', followed by one or
more 'struct mptcp_subflow_addrs', then inits the
mptcp_subflow_data structure as follows:

struct mptcp_subflow_addrs *sf_addr;
struct mptcp_subflow_data *addr;
socklen_t olen = sizeof(*addr) + (8 * sizeof(*sf_addr));

addr = malloc(olen);
addr->size_subflow_data = sizeof(*addr);
addr->num_subflows = 0;
addr->size_kernel = 0;
addr->size_user = sizeof(struct mptcp_subflow_addrs);

sf_addr = (struct mptcp_subflow_addrs *)(addr + 1);

and then retrieves the endpoint addresses via:
ret = getsockopt(fd, SOL_MPTCP, MPTCP_SUBFLOW_ADDRS,
		 addr, &olen);

If the call succeeds, kernel will have added up to 8
endpoint addresses after the 'mptcp_subflow_data' header.

Userspace needs to re-check 'olen' value to detect how
many bytes have been filled in by the kernel.

Userspace can check addr->num_subflows to discover when
there were more subflows that available data space.

Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mptcp.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 3f013a513770..c8cc46f80a16 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -4,6 +4,13 @@
 
 #include <linux/const.h>
 #include <linux/types.h>
+#include <linux/in.h>		/* for sockaddr_in			*/
+#include <linux/in6.h>		/* for sockaddr_in6			*/
+#include <linux/socket.h>	/* for sockaddr_storage and sa_family	*/
+
+#ifndef __KERNEL__
+#include <sys/socket.h>		/* for struct sockaddr			*/
+#endif
 
 #define MPTCP_SUBFLOW_FLAG_MCAP_REM		_BITUL(0)
 #define MPTCP_SUBFLOW_FLAG_MCAP_LOC		_BITUL(1)
@@ -200,8 +207,25 @@ struct mptcp_subflow_data {
 	__u32		size_user;			/* size of one element in data[] */
 } __attribute__((aligned(8)));
 
+struct mptcp_subflow_addrs {
+	union {
+		__kernel_sa_family_t sa_family;
+		struct sockaddr sa_local;
+		struct sockaddr_in sin_local;
+		struct sockaddr_in6 sin6_local;
+		struct __kernel_sockaddr_storage ss_local;
+	};
+	union {
+		struct sockaddr sa_remote;
+		struct sockaddr_in sin_remote;
+		struct sockaddr_in6 sin6_remote;
+		struct __kernel_sockaddr_storage ss_remote;
+	};
+};
+
 /* MPTCP socket options */
 #define MPTCP_INFO		1
 #define MPTCP_TCPINFO		2
+#define MPTCP_SUBFLOW_ADDRS	3
 
 #endif /* _UAPI_MPTCP_H */
-- 
cgit v1.2.3


From 5bd2182d58e9d9c6279b7a8a2f9b41add0e7f9cb Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Tue, 16 Feb 2021 19:46:48 -0500
Subject: audit,io_uring,io-wq: add some basic audit support to io_uring

This patch adds basic auditing to io_uring operations, regardless of
their context.  This is accomplished by allocating audit_context
structures for the io-wq worker and io_uring SQPOLL kernel threads
as well as explicitly auditing the io_uring operations in
io_issue_sqe().  Individual io_uring operations can bypass auditing
through the "audit_skip" field in the struct io_op_def definition for
the operation; although great care must be taken so that security
relevant io_uring operations do not bypass auditing; please contact
the audit mailing list (see the MAINTAINERS file) with any questions.

The io_uring operations are audited using a new AUDIT_URINGOP record,
an example is shown below:

  type=UNKNOWN[1336] msg=audit(1631800225.981:37289):
    uring_op=19 success=yes exit=0 items=0 ppid=15454 pid=15681
    uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0
    subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
    key=(null)

Thanks to Richard Guy Briggs for review and feedback.

Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/uapi/linux/audit.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index daa481729e9b..a1997697c8b1 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -118,6 +118,7 @@
 #define AUDIT_TIME_ADJNTPVAL	1333	/* NTP value adjustment */
 #define AUDIT_BPF		1334	/* BPF subsystem */
 #define AUDIT_EVENT_LISTENER	1335	/* Task joined multicast read socket */
+#define AUDIT_URINGOP		1336	/* io_uring operation */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
-- 
cgit v1.2.3


From 67daf270cebcf7aab4b3292b36f9adf357b23ddc Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Sun, 18 Apr 2021 21:54:47 -0400
Subject: audit: add filtering for io_uring records

This patch adds basic audit io_uring filtering, using as much of the
existing audit filtering infrastructure as possible.  In order to do
this we reuse the audit filter rule's syscall mask for the io_uring
operation and we create a new filter for io_uring operations as
AUDIT_FILTER_URING_EXIT/audit_filter_list[7].

Thanks to Richard Guy Briggs for his review, feedback, and work on
the corresponding audit userspace changes.

Acked-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/uapi/linux/audit.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index a1997697c8b1..ecf1edd2affa 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -167,8 +167,9 @@
 #define AUDIT_FILTER_EXCLUDE	0x05	/* Apply rule before record creation */
 #define AUDIT_FILTER_TYPE	AUDIT_FILTER_EXCLUDE /* obsolete misleading naming */
 #define AUDIT_FILTER_FS		0x06	/* Apply rule at __audit_inode_child */
+#define AUDIT_FILTER_URING_EXIT	0x07	/* Apply rule at io_uring op exit */
 
-#define AUDIT_NR_FILTERS	7
+#define AUDIT_NR_FILTERS	8
 
 #define AUDIT_FILTER_PREPEND	0x10	/* Prepend to front of list */
 
-- 
cgit v1.2.3


From 50d7bd38c3aafc4749e05e8d7fcb616979143602 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 17 May 2021 20:01:15 -0700
Subject: stddef: Introduce struct_group() helper macro

Kernel code has a regular need to describe groups of members within a
structure usually when they need to be copied or initialized separately
from the rest of the surrounding structure. The generally accepted design
pattern in C is to use a named sub-struct:

	struct foo {
		int one;
		struct {
			int two;
			int three, four;
		} thing;
		int five;
	};

This would allow for traditional references and sizing:

	memcpy(&dst.thing, &src.thing, sizeof(dst.thing));

However, doing this would mean that referencing struct members enclosed
by such named structs would always require including the sub-struct name
in identifiers:

	do_something(dst.thing.three);

This has tended to be quite inflexible, especially when such groupings
need to be added to established code which causes huge naming churn.
Three workarounds exist in the kernel for this problem, and each have
other negative properties.

To avoid the naming churn, there is a design pattern of adding macro
aliases for the named struct:

	#define f_three thing.three

This ends up polluting the global namespace, and makes it difficult to
search for identifiers.

Another common work-around in kernel code avoids the pollution by avoiding
the named struct entirely, instead identifying the group's boundaries using
either a pair of empty anonymous structs of a pair of zero-element arrays:

	struct foo {
		int one;
		struct { } start;
		int two;
		int three, four;
		struct { } finish;
		int five;
	};

	struct foo {
		int one;
		int start[0];
		int two;
		int three, four;
		int finish[0];
		int five;
	};

This allows code to avoid needing to use a sub-struct named for member
references within the surrounding structure, but loses the benefits of
being able to actually use such a struct, making it rather fragile. Using
these requires open-coded calculation of sizes and offsets. The efforts
made to avoid common mistakes include lots of comments, or adding various
BUILD_BUG_ON()s. Such code is left with no way for the compiler to reason
about the boundaries (e.g. the "start" object looks like it's 0 bytes
in length), making bounds checking depend on open-coded calculations:

	if (length > offsetof(struct foo, finish) -
		     offsetof(struct foo, start))
		return -EINVAL;
	memcpy(&dst.start, &src.start, offsetof(struct foo, finish) -
				       offsetof(struct foo, start));

However, the vast majority of places in the kernel that operate on
groups of members do so without any identification of the grouping,
relying either on comments or implicit knowledge of the struct contents,
which is even harder for the compiler to reason about, and results in
even more fragile manual sizing, usually depending on member locations
outside of the region (e.g. to copy "two" and "three", use the start of
"four" to find the size):

	BUILD_BUG_ON((offsetof(struct foo, four) <
		      offsetof(struct foo, two)) ||
		     (offsetof(struct foo, four) <
		      offsetof(struct foo, three));
	if (length > offsetof(struct foo, four) -
		     offsetof(struct foo, two))
		return -EINVAL;
	memcpy(&dst.two, &src.two, length);

In order to have a regular programmatic way to describe a struct
region that can be used for references and sizing, can be examined for
bounds checking, avoids forcing the use of intermediate identifiers,
and avoids polluting the global namespace, introduce the struct_group()
macro. This macro wraps the member declarations to create an anonymous
union of an anonymous struct (no intermediate name) and a named struct
(for references and sizing):

	struct foo {
		int one;
		struct_group(thing,
			int two;
			int three, four;
		);
		int five;
	};

	if (length > sizeof(src.thing))
		return -EINVAL;
	memcpy(&dst.thing, &src.thing, length);
	do_something(dst.three);

There are some rare cases where the resulting struct_group() needs
attributes added, so struct_group_attr() is also introduced to allow
for specifying struct attributes (e.g. __align(x) or __packed).
Additionally, there are places where such declarations would like to
have the struct be tagged, so struct_group_tagged() is added.

Given there is a need for a handful of UAPI uses too, the underlying
__struct_group() macro has been defined in UAPI so it can be used there
too.

To avoid confusing scripts/kernel-doc, hide the macro from its struct
parsing.

Co-developed-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Keith Packard <keithp@keithp.com>
Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/lkml/20210728023217.GC35706@embeddedor
Enhanced-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Link: https://lore.kernel.org/lkml/41183a98-bdb9-4ad6-7eab-5a7292a6df84@rasmusvillemoes.dk
Enhanced-by: Dan Williams <dan.j.williams@intel.com>
Link: https://lore.kernel.org/lkml/1d9a2e6df2a9a35b2cdd50a9a68cac5991e7e5f0.camel@intel.com
Enhanced-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://lore.kernel.org/lkml/YQKa76A6XuFqgM03@phenom.ffwll.local
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/stddef.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
index ee8220f8dcf5..610204f7c275 100644
--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
@@ -4,3 +4,24 @@
 #ifndef __always_inline
 #define __always_inline inline
 #endif
+
+/**
+ * __struct_group() - Create a mirrored named and anonyomous struct
+ *
+ * @TAG: The tag name for the named sub-struct (usually empty)
+ * @NAME: The identifier name of the mirrored sub-struct
+ * @ATTRS: Any struct attributes (usually empty)
+ * @MEMBERS: The member declarations for the mirrored structs
+ *
+ * Used to create an anonymous union of two structs with identical layout
+ * and size: one anonymous and one named. The former's members can be used
+ * normally without sub-struct naming, and the latter can be used to
+ * reason about the start, end, and size of the group of struct members.
+ * The named struct can also be explicitly tagged for layer reuse, as well
+ * as both having struct attributes appended.
+ */
+#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
+	union { \
+		struct { MEMBERS } ATTRS; \
+		struct TAG { MEMBERS } ATTRS NAME; \
+	}
-- 
cgit v1.2.3


From e306784a8de08868d0ecbf78dd42a0051d0e14ce Mon Sep 17 00:00:00 2001
From: Subrat Mishra <subratm@codeaurora.org>
Date: Wed, 15 Sep 2021 11:22:23 +0530
Subject: cfg80211: AP mode driver offload for FILS association crypto

Add a driver FILS crypto offload extended capability flag to indicate
that the driver running in AP mode is capable of handling encryption
and decryption of (Re)Association request and response frames.
Add a command to set FILS AAD data to driver.

This feature is supported on drivers running in AP mode only.
This extended capability is exchanged with hostapd during cfg80211
init. If the driver indicates this capability, then before sending the
Authentication response frame, hostapd sets FILS AAD data to the
driver. This allows the driver to decrypt (Re)Association Request
frame and encrypt (Re)Association Response frame. FILS Key derivation
will still be done in hostapd.

Signed-off-by: Subrat Mishra <subratm@codeaurora.org>
Link: https://lore.kernel.org/r/1631685143-13530-1-git-send-email-subratm@codeaurora.org
[fix whitespace]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c2efea98e060..e89bbf856228 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -300,6 +300,29 @@
  * the interface goes down.
  */
 
+/**
+ * DOC: FILS shared key crypto offload
+ *
+ * This feature is applicable to drivers running in AP mode.
+ *
+ * FILS shared key crypto offload can be advertised by drivers by setting
+ * @NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD flag. The drivers that support
+ * FILS shared key crypto offload should be able to encrypt and decrypt
+ * association frames for FILS shared key authentication as per IEEE 802.11ai.
+ * With this capability, for FILS key derivation, drivers depend on userspace.
+ *
+ * After FILS key derivation, userspace shares the FILS AAD details with the
+ * driver and the driver stores the same to use in decryption of association
+ * request and in encryption of association response. The below parameters
+ * should be given to the driver in %NL80211_CMD_SET_FILS_AAD.
+ *	%NL80211_ATTR_MAC - STA MAC address, used for storing FILS AAD per STA
+ *	%NL80211_ATTR_FILS_KEK - Used for encryption or decryption
+ *	%NL80211_ATTR_FILS_NONCES - Used for encryption or decryption
+ *			(STA Nonce 16 bytes followed by AP Nonce 16 bytes)
+ *
+ * Once the association is done, the driver cleans the FILS AAD data.
+ */
+
 /**
  * enum nl80211_commands - supported nl80211 commands
  *
@@ -1200,6 +1223,12 @@
  * @NL80211_CMD_COLOR_CHANGE_COMPLETED: Notify userland that the color change
  *	has completed
  *
+ * @NL80211_CMD_SET_FILS_AAD: Set FILS AAD data to the driver using -
+ *	&NL80211_ATTR_MAC - for STA MAC address
+ *	&NL80211_ATTR_FILS_KEK - for KEK
+ *	&NL80211_ATTR_FILS_NONCES - for FILS Nonces
+ *		(STA Nonce 16 bytes followed by AP Nonce 16 bytes)
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1440,6 +1469,8 @@ enum nl80211_commands {
 	NL80211_CMD_COLOR_CHANGE_ABORTED,
 	NL80211_CMD_COLOR_CHANGE_COMPLETED,
 
+	NL80211_CMD_SET_FILS_AAD,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -5995,6 +6026,11 @@ enum nl80211_feature_flags {
  * @NL80211_EXT_FEATURE_BSS_COLOR: The driver supports BSS color collision
  *	detection and change announcemnts.
  *
+ * @NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD: Driver running in AP mode supports
+ *	FILS encryption and decryption for (Re)Association Request and Response
+ *	frames. Userspace has to share FILS AAD details to the driver by using
+ *	@NL80211_CMD_SET_FILS_AAD.
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -6060,6 +6096,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_SECURE_RTT,
 	NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE,
 	NL80211_EXT_FEATURE_BSS_COLOR,
+	NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
-- 
cgit v1.2.3


From dc1e3cb8da8b414b37208b2fb6755fef8122504b Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Wed, 15 Sep 2021 19:54:34 -0700
Subject: nl80211: MBSSID and EMA support in AP mode

Add new attributes to configure support for multiple BSSID
and advanced multi-BSSID advertisements (EMA) in AP mode.

- NL80211_ATTR_MBSSID_CONFIG used for per interface configuration.
- NL80211_ATTR_MBSSID_ELEMS used to MBSSID elements for beacons.

Memory for the elements is allocated dynamically. This change frees
the memory in existing functions which call nl80211_parse_beacon(),
a comment is added to indicate the new references to do the same.

Signed-off-by: John Crispin <john@phrozen.org>
Co-developed-by: Aloka Dixit <alokad@codeaurora.org>
Signed-off-by: Aloka Dixit <alokad@codeaurora.org>
Link: https://lore.kernel.org/r/20210916025437.29138-2-alokad@codeaurora.org
[don't leave ERR_PTR hanging around]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 76 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index e89bbf856228..eda608b1eb09 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -360,7 +360,10 @@
  * @NL80211_CMD_DEL_INTERFACE: Virtual interface was deleted, has attributes
  *	%NL80211_ATTR_IFINDEX and %NL80211_ATTR_WIPHY. Can also be sent from
  *	userspace to request deletion of a virtual interface, then requires
- *	attribute %NL80211_ATTR_IFINDEX.
+ *	attribute %NL80211_ATTR_IFINDEX. If multiple BSSID advertisements are
+ *	enabled using %NL80211_ATTR_MBSSID_CONFIG, %NL80211_ATTR_MBSSID_ELEMS,
+ *	and if this command is used for the transmitting interface, then all
+ *	the non-transmitting interfaces are deleted as well.
  *
  * @NL80211_CMD_GET_KEY: Get sequence counter information for a key specified
  *	by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC.
@@ -2624,6 +2627,18 @@ enum nl80211_commands {
  * @NL80211_ATTR_COLOR_CHANGE_ELEMS: Nested set of attributes containing the IE
  *	information for the time while performing a color switch.
  *
+ * @NL80211_ATTR_MBSSID_CONFIG: Nested attribute for multiple BSSID
+ *	advertisements (MBSSID) parameters in AP mode.
+ *	Kernel uses this attribute to indicate the driver's support for MBSSID
+ *	and enhanced multi-BSSID advertisements (EMA AP) to the userspace.
+ *	Userspace should use this attribute to configure per interface MBSSID
+ *	parameters.
+ *	See &enum nl80211_mbssid_config_attributes for details.
+ *
+ * @NL80211_ATTR_MBSSID_ELEMS: Nested parameter to pass multiple BSSID elements.
+ *	Mandatory parameter for the transmitting interface to enable MBSSID.
+ *	Optional for the non-transmitting interfaces.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3127,6 +3142,9 @@ enum nl80211_attrs {
 	NL80211_ATTR_COLOR_CHANGE_COLOR,
 	NL80211_ATTR_COLOR_CHANGE_ELEMS,
 
+	NL80211_ATTR_MBSSID_CONFIG,
+	NL80211_ATTR_MBSSID_ELEMS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -7386,4 +7404,60 @@ enum nl80211_sar_specs_attrs {
 	NL80211_SAR_ATTR_SPECS_MAX = __NL80211_SAR_ATTR_SPECS_LAST - 1,
 };
 
+/**
+ * enum nl80211_mbssid_config_attributes - multiple BSSID (MBSSID) and enhanced
+ * multi-BSSID advertisements (EMA) in AP mode.
+ * Kernel uses some of these attributes to advertise driver's support for
+ * MBSSID and EMA.
+ * Remaining attributes should be used by the userspace to configure the
+ * features.
+ *
+ * @__NL80211_MBSSID_CONFIG_ATTR_INVALID: Invalid
+ *
+ * @NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES: Used by the kernel to advertise
+ *	the maximum number of MBSSID interfaces supported by the driver.
+ *	Driver should indicate MBSSID support by setting
+ *	wiphy->mbssid_max_interfaces to a value more than or equal to 2.
+ *
+ * @NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY: Used by the kernel
+ *	to advertise the maximum profile periodicity supported by the driver
+ *	if EMA is enabled. Driver should indicate EMA support to the userspace
+ *	by setting wiphy->mbssid_max_ema_profile_periodicity to
+ *	a non-zero value.
+ *
+ * @NL80211_MBSSID_CONFIG_ATTR_INDEX: Mandatory parameter to pass the index of
+ *	this BSS (u8) in the multiple BSSID set.
+ *	Value must be set to 0 for the transmitting interface and non-zero for
+ *	all non-transmitting interfaces. The userspace will be responsible
+ *	for using unique indices for the interfaces.
+ *	Range: 0 to wiphy->mbssid_max_interfaces-1.
+ *
+ * @NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX: Mandatory parameter for
+ *	a non-transmitted profile which provides the interface index (u32) of
+ *	the transmitted profile. The value must match one of the interface
+ *	indices advertised by the kernel. Optional if the interface being set up
+ *	is the transmitting one, however, if provided then the value must match
+ *	the interface index of the same.
+ *
+ * @NL80211_MBSSID_CONFIG_ATTR_EMA: Flag used to enable EMA AP feature.
+ *	Setting this flag is permitted only if the driver advertises EMA support
+ *	by setting wiphy->mbssid_max_ema_profile_periodicity to non-zero.
+ *
+ * @__NL80211_MBSSID_CONFIG_ATTR_LAST: Internal
+ * @NL80211_MBSSID_CONFIG_ATTR_MAX: highest attribute
+ */
+enum nl80211_mbssid_config_attributes {
+	__NL80211_MBSSID_CONFIG_ATTR_INVALID,
+
+	NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES,
+	NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY,
+	NL80211_MBSSID_CONFIG_ATTR_INDEX,
+	NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX,
+	NL80211_MBSSID_CONFIG_ATTR_EMA,
+
+	/* keep last */
+	__NL80211_MBSSID_CONFIG_ATTR_LAST,
+	NL80211_MBSSID_CONFIG_ATTR_MAX = __NL80211_MBSSID_CONFIG_ATTR_LAST - 1,
+};
+
 #endif /* __LINUX_NL80211_H */
-- 
cgit v1.2.3


From 61bc346ce64a3864ac55f5d18bdc1572cda4fb18 Mon Sep 17 00:00:00 2001
From: Eugene Syromiatnikov <esyr@redhat.com>
Date: Wed, 25 Aug 2021 19:06:13 +0200
Subject: uapi/linux/prctl: provide macro definitions for the PR_SCHED_CORE
 type argument

Commit 7ac592aa35a684ff ("sched: prctl() core-scheduling interface")
made use of enum pid_type in prctl's arg4; this type and the associated
enumeration definitions are not exposed to userspace.  Christian
has suggested to provide additional macro definitions that convey
the meaning of the type argument more in alignment with its actual
usage, and this patch does exactly that.

Link: https://lore.kernel.org/r/20210825170613.GA3884@asgard.redhat.com
Suggested-by: Christian Brauner <christian.brauner@ubuntu.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
Complements: 7ac592aa35a684ff ("sched: prctl() core-scheduling interface")
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 include/uapi/linux/prctl.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 43bd7f713c39..b2e4dc1449b9 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -268,5 +268,8 @@ struct prctl_mm_map {
 # define PR_SCHED_CORE_SHARE_TO		2 /* push core_sched cookie to pid */
 # define PR_SCHED_CORE_SHARE_FROM	3 /* pull core_sched cookie to pid */
 # define PR_SCHED_CORE_MAX		4
+# define PR_SCHED_CORE_SCOPE_THREAD		0
+# define PR_SCHED_CORE_SCOPE_THREAD_GROUP	1
+# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP	2
 
 #endif /* _LINUX_PRCTL_H */
-- 
cgit v1.2.3


From b84f60a307f09debe30cc171b0f0a5c36797cf67 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 5 Aug 2021 04:47:49 +0200
Subject: media: Rename V4L2_PIX_FMT_SUNXI_TILED_NV12 to
 V4L2_PIX_FMT_NV12_32L32

The V4L2_PIX_FMT_SUNXI_TILED_NV12 format is actually a fairly
common NV12 tiled format, with 32x32 linear tiles. Rename the format
and move its documentation together with the other tiled NV12 formats.

Keep V4L2_PIX_FMT_SUNXI_TILED_NV12 for application compatibility.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/videodev2.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 9260791b8438..0188cd39468f 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -627,6 +627,9 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_YUV444M v4l2_fourcc('Y', 'M', '2', '4') /* 24  YUV444 planar */
 #define V4L2_PIX_FMT_YVU444M v4l2_fourcc('Y', 'M', '4', '2') /* 24  YVU444 planar */
 
+/* Tiled YUV formats */
+#define V4L2_PIX_FMT_NV12_32L32 v4l2_fourcc('S', 'T', '1', '2') /* 12  Y/CbCr 4:2:0 32x32 tiles */
+
 /* Bayer formats - see http://www.siliconimaging.com/RGB%20Bayer.htm */
 #define V4L2_PIX_FMT_SBGGR8  v4l2_fourcc('B', 'A', '8', '1') /*  8  BGBG.. GRGR.. */
 #define V4L2_PIX_FMT_SGBRG8  v4l2_fourcc('G', 'B', 'R', 'G') /*  8  GBGB.. RGRG.. */
@@ -734,7 +737,6 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_Z16      v4l2_fourcc('Z', '1', '6', ' ') /* Depth data 16-bit */
 #define V4L2_PIX_FMT_MT21C    v4l2_fourcc('M', 'T', '2', '1') /* Mediatek compressed block mode  */
 #define V4L2_PIX_FMT_INZI     v4l2_fourcc('I', 'N', 'Z', 'I') /* Intel Planar Greyscale 10-bit and Depth 16-bit */
-#define V4L2_PIX_FMT_SUNXI_TILED_NV12 v4l2_fourcc('S', 'T', '1', '2') /* Sunxi Tiled NV12 Format */
 #define V4L2_PIX_FMT_CNF4     v4l2_fourcc('C', 'N', 'F', '4') /* Intel 4-bit packed depth confidence information */
 #define V4L2_PIX_FMT_HI240    v4l2_fourcc('H', 'I', '2', '4') /* BTTV 8-bit dithered RGB */
 
@@ -2615,4 +2617,9 @@ struct v4l2_create_buffers {
 
 #define BASE_VIDIOC_PRIVATE	192		/* 192-255 are private */
 
+/* Deprecated definitions kept for backwards compatibility */
+#ifndef __KERNEL__
+#define V4L2_PIX_FMT_SUNXI_TILED_NV12 V4L2_PIX_FMT_NV12_32L32
+#endif
+
 #endif /* _UAPI__LINUX_VIDEODEV2_H */
-- 
cgit v1.2.3


From 78eee7b5f110c9884c8ffd1dfcdd9c29296f3e43 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 5 Aug 2021 04:47:50 +0200
Subject: media: Rename V4L2_PIX_FMT_HM12 to V4L2_PIX_FMT_NV12_16L16

The V4L2_PIX_FMT_HM12 format is actually a simple NV12 tiled format,
with 16x16 linear tiles. Rename the format and move its documentation
together with the other tiled NV12 formats.

Keep V4L2_PIX_FMT_HM12 for application compatibility.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/videodev2.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 0188cd39468f..40fec00ce73a 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -601,7 +601,6 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_NV61    v4l2_fourcc('N', 'V', '6', '1') /* 16  Y/CrCb 4:2:2  */
 #define V4L2_PIX_FMT_NV24    v4l2_fourcc('N', 'V', '2', '4') /* 24  Y/CbCr 4:4:4  */
 #define V4L2_PIX_FMT_NV42    v4l2_fourcc('N', 'V', '4', '2') /* 24  Y/CrCb 4:4:4  */
-#define V4L2_PIX_FMT_HM12    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
 
 /* two non contiguous planes - one Y, one Cr + Cb interleaved  */
 #define V4L2_PIX_FMT_NV12M   v4l2_fourcc('N', 'M', '1', '2') /* 12  Y/CbCr 4:2:0  */
@@ -628,6 +627,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_YVU444M v4l2_fourcc('Y', 'M', '4', '2') /* 24  YVU444 planar */
 
 /* Tiled YUV formats */
+#define V4L2_PIX_FMT_NV12_16L16 v4l2_fourcc('H', 'M', '1', '2') /* 12  Y/CbCr 4:2:0 16x16 tiles */
 #define V4L2_PIX_FMT_NV12_32L32 v4l2_fourcc('S', 'T', '1', '2') /* 12  Y/CbCr 4:2:0 32x32 tiles */
 
 /* Bayer formats - see http://www.siliconimaging.com/RGB%20Bayer.htm */
@@ -2619,6 +2619,7 @@ struct v4l2_create_buffers {
 
 /* Deprecated definitions kept for backwards compatibility */
 #ifndef __KERNEL__
+#define V4L2_PIX_FMT_HM12 V4L2_PIX_FMT_NV12_16L16
 #define V4L2_PIX_FMT_SUNXI_TILED_NV12 V4L2_PIX_FMT_NV12_32L32
 #endif
 
-- 
cgit v1.2.3


From 683f71ebb35d9223b4a22488e2eaffac30af104d Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 5 Aug 2021 04:47:51 +0200
Subject: media: Add NV12_4L4 tiled format

This format is produced by VeriSilicon Hantro G2 and VC8000D cores.
It is a simple 4x4 tiling layout in a linear way.

The pixel format was introduced by GStreamer using FourCC VT12,
so let's stick to it.

Link: https://gstreamer.freedesktop.org/documentation/video/video-format.html

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 40fec00ce73a..56003a5467fc 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -627,6 +627,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_YVU444M v4l2_fourcc('Y', 'M', '4', '2') /* 24  YVU444 planar */
 
 /* Tiled YUV formats */
+#define V4L2_PIX_FMT_NV12_4L4 v4l2_fourcc('V', 'T', '1', '2')   /* 12  Y/CbCr 4:2:0  4x4 tiles */
 #define V4L2_PIX_FMT_NV12_16L16 v4l2_fourcc('H', 'M', '1', '2') /* 12  Y/CbCr 4:2:0 16x16 tiles */
 #define V4L2_PIX_FMT_NV12_32L32 v4l2_fourcc('S', 'T', '1', '2') /* 12  Y/CbCr 4:2:0 32x32 tiles */
 
-- 
cgit v1.2.3


From 75b8f8f2646ccaf085a87983329be8e47bd8b6bc Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 5 Aug 2021 04:47:52 +0200
Subject: media: Clean V4L2_PIX_FMT_NV12MT documentation

Add more information about V4L2_PIX_FMT_NV12MT and
V4L2_PIX_FMT_NV12M_16X16, so it's clearer for driver authors and users.

Also, group the two pixel formats with the other tiled formats,
for clarity.

Unlike the recently introduced tiled formats (V4L2_PIX_FMT_NV12_4L4, etc)
these formats have remained Samsung-specific until now. Therefore, and
although the NV12MT and NV12MT_16X16 nomenclatures are less clear, we are
keeping them as-is.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/videodev2.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 56003a5467fc..58392dcd3bf5 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -607,8 +607,6 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_NV21M   v4l2_fourcc('N', 'M', '2', '1') /* 21  Y/CrCb 4:2:0  */
 #define V4L2_PIX_FMT_NV16M   v4l2_fourcc('N', 'M', '1', '6') /* 16  Y/CbCr 4:2:2  */
 #define V4L2_PIX_FMT_NV61M   v4l2_fourcc('N', 'M', '6', '1') /* 16  Y/CrCb 4:2:2  */
-#define V4L2_PIX_FMT_NV12MT  v4l2_fourcc('T', 'M', '1', '2') /* 12  Y/CbCr 4:2:0 64x32 macroblocks */
-#define V4L2_PIX_FMT_NV12MT_16X16 v4l2_fourcc('V', 'M', '1', '2') /* 12  Y/CbCr 4:2:0 16x16 macroblocks */
 
 /* three planes - Y Cb, Cr */
 #define V4L2_PIX_FMT_YUV410  v4l2_fourcc('Y', 'U', 'V', '9') /*  9  YUV 4:1:0     */
@@ -631,6 +629,10 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_NV12_16L16 v4l2_fourcc('H', 'M', '1', '2') /* 12  Y/CbCr 4:2:0 16x16 tiles */
 #define V4L2_PIX_FMT_NV12_32L32 v4l2_fourcc('S', 'T', '1', '2') /* 12  Y/CbCr 4:2:0 32x32 tiles */
 
+/* Tiled YUV formats, non contiguous planes */
+#define V4L2_PIX_FMT_NV12MT  v4l2_fourcc('T', 'M', '1', '2') /* 12  Y/CbCr 4:2:0 64x32 tiles */
+#define V4L2_PIX_FMT_NV12MT_16X16 v4l2_fourcc('V', 'M', '1', '2') /* 12  Y/CbCr 4:2:0 16x16 tiles */
+
 /* Bayer formats - see http://www.siliconimaging.com/RGB%20Bayer.htm */
 #define V4L2_PIX_FMT_SBGGR8  v4l2_fourcc('B', 'A', '8', '1') /*  8  BGBG.. GRGR.. */
 #define V4L2_PIX_FMT_SGBRG8  v4l2_fourcc('G', 'B', 'R', 'G') /*  8  GBGB.. RGRG.. */
-- 
cgit v1.2.3


From ffe5350c016a0de8ac77d32d9d4ea378cc9ff402 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@chromium.org>
Date: Fri, 6 Aug 2021 06:15:25 +0200
Subject: media: add Mediatek's MM21 format

Add Mediatek's non-compressed 8 bit block video mode. This format is
produced by the MT8183 codec and can be converted to a non-proprietary
format by the MDP3 component.

Signed-off-by: Alexandre Courbot <acourbot@chromium.org>
Signed-off-by: Tzung-Bi Shih <tzungbi@google.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 58392dcd3bf5..5cc9545feb40 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -739,6 +739,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_Y12I     v4l2_fourcc('Y', '1', '2', 'I') /* Greyscale 12-bit L/R interleaved */
 #define V4L2_PIX_FMT_Z16      v4l2_fourcc('Z', '1', '6', ' ') /* Depth data 16-bit */
 #define V4L2_PIX_FMT_MT21C    v4l2_fourcc('M', 'T', '2', '1') /* Mediatek compressed block mode  */
+#define V4L2_PIX_FMT_MM21     v4l2_fourcc('M', 'M', '2', '1') /* Mediatek 8-bit block mode, two non-contiguous planes */
 #define V4L2_PIX_FMT_INZI     v4l2_fourcc('I', 'N', 'Z', 'I') /* Intel Planar Greyscale 10-bit and Depth 16-bit */
 #define V4L2_PIX_FMT_CNF4     v4l2_fourcc('C', 'N', 'F', '4') /* Intel 4-bit packed depth confidence information */
 #define V4L2_PIX_FMT_HI240    v4l2_fourcc('H', 'I', '2', '4') /* BTTV 8-bit dithered RGB */
-- 
cgit v1.2.3


From a9c80593ff80ddb7c6496624e5384e1ea3460a72 Mon Sep 17 00:00:00 2001
From: David Plowman <david.plowman@raspberrypi.com>
Date: Mon, 16 Aug 2021 13:39:08 +0200
Subject: media: v4l2-ctrls: Add V4L2_CID_NOTIFY_GAINS control

We add a new control V4L2_CID_NOTIFY_GAINS which allows the sensor to
be notified what gains will be applied to the different colour
channels by subsequent processing (such as by an ISP), even though the
sensor will not apply any of these gains itself.

For Bayer sensors this will be an array control taking 4 values which
are the 4 gains arranged in the fixed order B, Gb, Gr and R,
irrespective of the exact Bayer order of the sensor itself. The use of
an array makes it straightforward to extend this control to non-Bayer
sensors (for example, sensors with an RGBW pattern) in future.

The units are in all cases linear with the default value indicating a
gain of exactly 1.0. For example, if the default value were reported as
128 then the value 192 would represent a gain of exactly 1.5.

Signed-off-by: David Plowman <david.plowman@raspberrypi.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/v4l2-controls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 5532b5f68493..133e20444939 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -1118,6 +1118,7 @@ enum v4l2_jpeg_chroma_subsampling {
 #define V4L2_CID_TEST_PATTERN_BLUE		(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 6)
 #define V4L2_CID_TEST_PATTERN_GREENB		(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 7)
 #define V4L2_CID_UNIT_CELL_SIZE			(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 8)
+#define V4L2_CID_NOTIFY_GAINS			(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 9)
 
 
 /* Image processing controls */
-- 
cgit v1.2.3


From 965c1e0bfeb66888fb000540c1fc4e8b55533d9c Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Thu, 9 Sep 2021 13:24:27 +0200
Subject: media: videobuf2: add V4L2_MEMORY_FLAG_NON_COHERENT flag

By setting or clearing the V4L2_MEMORY_FLAG_NON_COHERENT flag
user-space should be able to hint vb2 that either non-coherent
(if supported) or coherent memory should be used for the buffer
allocation.

Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/videodev2.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 5cc9545feb40..9b7032abb2c7 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -962,6 +962,8 @@ struct v4l2_requestbuffers {
 	__u32			reserved[1];
 };
 
+#define V4L2_MEMORY_FLAG_NON_COHERENT			(1 << 0)
+
 /* capabilities for struct v4l2_requestbuffers and v4l2_create_buffers */
 #define V4L2_BUF_CAP_SUPPORTS_MMAP			(1 << 0)
 #define V4L2_BUF_CAP_SUPPORTS_USERPTR			(1 << 1)
-- 
cgit v1.2.3


From c0acf9cfeee061f041fab778dbdcb34b6ca5e2e7 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Thu, 9 Sep 2021 13:24:29 +0200
Subject: media: videobuf2: handle V4L2_MEMORY_FLAG_NON_COHERENT flag

This patch lets user-space request a non-coherent memory
allocation during CREATE_BUFS and REQBUFS ioctl calls.

= CREATE_BUFS

  struct v4l2_create_buffers has seven 4-byte reserved areas,
  so reserved[0] is renamed to ->flags. The struct, thus, now
  has six reserved 4-byte regions.

= CREATE_BUFS32

  struct v4l2_create_buffers32 has seven 4-byte reserved areas,
  so reserved[0] is renamed to ->flags. The struct, thus, now
  has six reserved 4-byte regions.

= REQBUFS

 We use one byte of a 4 byte ->reserved[1] member of struct
 v4l2_requestbuffers. The struct, thus, now has reserved 3 bytes.

Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/videodev2.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 9b7032abb2c7..f118fe7a9f58 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -959,7 +959,8 @@ struct v4l2_requestbuffers {
 	__u32			type;		/* enum v4l2_buf_type */
 	__u32			memory;		/* enum v4l2_memory */
 	__u32			capabilities;
-	__u32			reserved[1];
+	__u8			flags;
+	__u8			reserved[3];
 };
 
 #define V4L2_MEMORY_FLAG_NON_COHERENT			(1 << 0)
@@ -2507,6 +2508,9 @@ struct v4l2_dbg_chip_info {
  * @memory:	enum v4l2_memory; buffer memory type
  * @format:	frame format, for which buffers are requested
  * @capabilities: capabilities of this buffer type.
+ * @flags:	additional buffer management attributes (ignored unless the
+ *		queue has V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS capability
+ *		and configured for MMAP streaming I/O).
  * @reserved:	future extensions
  */
 struct v4l2_create_buffers {
@@ -2515,7 +2519,8 @@ struct v4l2_create_buffers {
 	__u32			memory;
 	struct v4l2_format	format;
 	__u32			capabilities;
-	__u32			reserved[7];
+	__u32			flags;
+	__u32			reserved[6];
 };
 
 /*
-- 
cgit v1.2.3


From a70e3f024d5f4ec7edb17ab5d927eb55397f1d15 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 30 Sep 2021 14:21:04 -0700
Subject: devlink: report maximum number of snapshots with regions

Each region has an independently configurable number of maximum
snapshots. This information is not reported to userspace, making it not
very discoverable. Fix this by adding a new
DEVLINK_ATTR_REGION_MAX_SNAPSHOST attribute which is used to report this
maximum.

Ex:

  $devlink region
  pci/0000:af:00.0/nvm-flash: size 10485760 snapshot [] max 1
  pci/0000:af:00.0/device-caps: size 4096 snapshot [] max 10
  pci/0000:af:00.1/nvm-flash: size 10485760 snapshot [] max 1
  pci/0000:af:00.1/device-caps: size 4096 snapshot [] max 10

This information enables users to understand why a new region command
may fail due to having too many existing snapshots.

Reported-by: Gurucharan G <gurucharanx.g@intel.com> (A Contingent worker at Intel)
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 32f53a0069d6..b897b80770f6 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -551,6 +551,8 @@ enum devlink_attr {
 	DEVLINK_ATTR_RATE_NODE_NAME,		/* string */
 	DEVLINK_ATTR_RATE_PARENT_NODE_NAME,	/* string */
 
+	DEVLINK_ATTR_REGION_MAX_SNAPSHOTS,	/* u32 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
-- 
cgit v1.2.3


From ef5825e3cf0d0af657f5fb4dd86d750ed42fee0a Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 2 Sep 2021 11:14:47 +1000
Subject: NFSD: move filehandle format declarations out of "uapi".

A small part of the declaration concerning filehandle format are
currently in the "uapi" include directory:
   include/uapi/linux/nfsd/nfsfh.h

There is a lot more to the filehandle format, including "enum fid_type"
and "enum nfsd_fsid" which are not exported via "uapi".

This small part of the filehandle definition is of minimal use outside
of the kernel, and I can find no evidence that an other code is using
it. Certainly nfs-utils and wireshark (The most likely candidates) do not
use these declarations.

So move it out of "uapi" by copying the content from
  include/uapi/linux/nfsd/nfsfh.h
into
  fs/nfsd/nfsfh.h

A few unnecessary "#include" directives are not copied, and neither is
the #define of fh_auth, which is annotated as being for userspace only.

The copyright claims in the uapi file are identical to those in the nfsd
file, so there is no need to copy those.

The "__u32" style integer types are only needed in "uapi".  In
kernel-only code we can use the more familiar "u32" style.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/uapi/linux/nfsd/nfsfh.h | 115 ----------------------------------------
 1 file changed, 115 deletions(-)
 delete mode 100644 include/uapi/linux/nfsd/nfsfh.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h
deleted file mode 100644
index e29e8accc4f4..000000000000
--- a/include/uapi/linux/nfsd/nfsfh.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * This file describes the layout of the file handles as passed
- * over the wire.
- *
- * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef _UAPI_LINUX_NFSD_FH_H
-#define _UAPI_LINUX_NFSD_FH_H
-
-#include <linux/types.h>
-#include <linux/nfs.h>
-#include <linux/nfs2.h>
-#include <linux/nfs3.h>
-#include <linux/nfs4.h>
-
-/*
- * This is the old "dentry style" Linux NFSv2 file handle.
- *
- * The xino and xdev fields are currently used to transport the
- * ino/dev of the exported inode.
- */
-struct nfs_fhbase_old {
-	__u32		fb_dcookie;	/* dentry cookie - always 0xfeebbaca */
-	__u32		fb_ino;		/* our inode number */
-	__u32		fb_dirino;	/* dir inode number, 0 for directories */
-	__u32		fb_dev;		/* our device */
-	__u32		fb_xdev;
-	__u32		fb_xino;
-	__u32		fb_generation;
-};
-
-/*
- * This is the new flexible, extensible style NFSv2/v3/v4 file handle.
- *
- * The file handle starts with a sequence of four-byte words.
- * The first word contains a version number (1) and three descriptor bytes
- * that tell how the remaining 3 variable length fields should be handled.
- * These three bytes are auth_type, fsid_type and fileid_type.
- *
- * All four-byte values are in host-byte-order.
- *
- * The auth_type field is deprecated and must be set to 0.
- *
- * The fsid_type identifies how the filesystem (or export point) is
- *    encoded.
- *  Current values:
- *     0  - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number
- *        NOTE: we cannot use the kdev_t device id value, because kdev_t.h
- *              says we mustn't.  We must break it up and reassemble.
- *     1  - 4 byte user specified identifier
- *     2  - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED
- *     3  - 4 byte device id, encoded for user-space, 4 byte inode number
- *     4  - 4 byte inode number and 4 byte uuid
- *     5  - 8 byte uuid
- *     6  - 16 byte uuid
- *     7  - 8 byte inode number and 16 byte uuid
- *
- * The fileid_type identified how the file within the filesystem is encoded.
- *   The values for this field are filesystem specific, exccept that
- *   filesystems must not use the values '0' or '0xff'. 'See enum fid_type'
- *   in include/linux/exportfs.h for currently registered values.
- */
-struct nfs_fhbase_new {
-	union {
-		struct {
-			__u8		fb_version_aux;	/* == 1, even => nfs_fhbase_old */
-			__u8		fb_auth_type_aux;
-			__u8		fb_fsid_type_aux;
-			__u8		fb_fileid_type_aux;
-			__u32		fb_auth[1];
-			/*	__u32		fb_fsid[0]; floating */
-			/*	__u32		fb_fileid[0]; floating */
-		};
-		struct {
-			__u8		fb_version;	/* == 1, even => nfs_fhbase_old */
-			__u8		fb_auth_type;
-			__u8		fb_fsid_type;
-			__u8		fb_fileid_type;
-			__u32		fb_auth_flex[]; /* flexible-array member */
-		};
-	};
-};
-
-struct knfsd_fh {
-	unsigned int	fh_size;	/* significant for NFSv3.
-					 * Points to the current size while building
-					 * a new file handle
-					 */
-	union {
-		struct nfs_fhbase_old	fh_old;
-		__u32			fh_pad[NFS4_FHSIZE/4];
-		struct nfs_fhbase_new	fh_new;
-	} fh_base;
-};
-
-#define ofh_dcookie		fh_base.fh_old.fb_dcookie
-#define ofh_ino			fh_base.fh_old.fb_ino
-#define ofh_dirino		fh_base.fh_old.fb_dirino
-#define ofh_dev			fh_base.fh_old.fb_dev
-#define ofh_xdev		fh_base.fh_old.fb_xdev
-#define ofh_xino		fh_base.fh_old.fb_xino
-#define ofh_generation		fh_base.fh_old.fb_generation
-
-#define	fh_version		fh_base.fh_new.fb_version
-#define	fh_fsid_type		fh_base.fh_new.fb_fsid_type
-#define	fh_auth_type		fh_base.fh_new.fb_auth_type
-#define	fh_fileid_type		fh_base.fh_new.fb_fileid_type
-#define	fh_fsid			fh_base.fh_new.fb_auth_flex
-
-/* Do not use, provided for userspace compatiblity. */
-#define	fh_auth			fh_base.fh_new.fb_auth
-
-#endif /* _UAPI_LINUX_NFSD_FH_H */
-- 
cgit v1.2.3


From dea8ee31a039277576c215fffa13957970246366 Mon Sep 17 00:00:00 2001
From: Atish Patra <atish.patra@wdc.com>
Date: Mon, 27 Sep 2021 17:10:14 +0530
Subject: RISC-V: KVM: Add SBI v0.1 support

The KVM host kernel is running in HS-mode needs so we need to handle
the SBI calls coming from guest kernel running in VS-mode.

This patch adds SBI v0.1 support in KVM RISC-V. Almost all SBI v0.1
calls are implemented in KVM kernel module except GETCHAR and PUTCHART
calls which are forwarded to user space because these calls cannot be
implemented in kernel space. In future, when we implement SBI v0.2 for
Guest, we will forward SBI v0.2 experimental and vendor extension calls
to user space.

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Signed-off-by: Anup Patel <anup.patel@wdc.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 include/uapi/linux/kvm.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a067410ebea5..322b4b588d75 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -269,6 +269,7 @@ struct kvm_xen_exit {
 #define KVM_EXIT_AP_RESET_HOLD    32
 #define KVM_EXIT_X86_BUS_LOCK     33
 #define KVM_EXIT_XEN              34
+#define KVM_EXIT_RISCV_SBI        35
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -469,6 +470,13 @@ struct kvm_run {
 		} msr;
 		/* KVM_EXIT_XEN */
 		struct kvm_xen_exit xen;
+		/* KVM_EXIT_RISCV_SBI */
+		struct {
+			unsigned long extension_id;
+			unsigned long function_id;
+			unsigned long args[6];
+			unsigned long ret[2];
+		} riscv_sbi;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
-- 
cgit v1.2.3


From 8cb3bf8bff3c47e171f6b66f9ccfc3f1451a11a2 Mon Sep 17 00:00:00 2001
From: Justin Iurman <justin.iurman@uliege.be>
Date: Sun, 3 Oct 2021 20:45:38 +0200
Subject: ipv6: ioam: Add support for the ip6ip6 encapsulation

This patch adds support for the ip6ip6 encapsulation by providing three encap
modes: inline, encap and auto.

Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ioam6_iptunnel.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h
index bae14636a8c8..829ffdfcacca 100644
--- a/include/uapi/linux/ioam6_iptunnel.h
+++ b/include/uapi/linux/ioam6_iptunnel.h
@@ -9,9 +9,38 @@
 #ifndef _UAPI_LINUX_IOAM6_IPTUNNEL_H
 #define _UAPI_LINUX_IOAM6_IPTUNNEL_H
 
+/* Encap modes:
+ *  - inline: direct insertion
+ *  - encap: ip6ip6 encapsulation
+ *  - auto: inline for local packets, encap for in-transit packets
+ */
+enum {
+	__IOAM6_IPTUNNEL_MODE_MIN,
+
+	IOAM6_IPTUNNEL_MODE_INLINE,
+	IOAM6_IPTUNNEL_MODE_ENCAP,
+	IOAM6_IPTUNNEL_MODE_AUTO,
+
+	__IOAM6_IPTUNNEL_MODE_MAX,
+};
+
+#define IOAM6_IPTUNNEL_MODE_MIN (__IOAM6_IPTUNNEL_MODE_MIN + 1)
+#define IOAM6_IPTUNNEL_MODE_MAX (__IOAM6_IPTUNNEL_MODE_MAX - 1)
+
 enum {
 	IOAM6_IPTUNNEL_UNSPEC,
+
+	/* Encap mode */
+	IOAM6_IPTUNNEL_MODE,		/* u8 */
+
+	/* Tunnel dst address.
+	 * For encap,auto modes.
+	 */
+	IOAM6_IPTUNNEL_DST,		/* struct in6_addr */
+
+	/* IOAM Trace Header */
 	IOAM6_IPTUNNEL_TRACE,		/* struct ioam6_trace_hdr */
+
 	__IOAM6_IPTUNNEL_MAX,
 };
 
-- 
cgit v1.2.3


From 571e5c0efcb29c5dac8cf2949d3eed84ec43056c Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 19 May 2021 16:00:22 -0400
Subject: audit: add OPENAT2 record to list "how" info

Since the openat2(2) syscall uses a struct open_how pointer to communicate
its parameters they are not usefully recorded by the audit SYSCALL record's
four existing arguments.

Add a new audit record type OPENAT2 that reports the parameters in its
third argument, struct open_how with fields oflag, mode and resolve.

The new record in the context of an event would look like:
time->Wed Mar 17 16:28:53 2021
type=PROCTITLE msg=audit(1616012933.531:184): proctitle=
  73797363616C6C735F66696C652F6F70656E617432002F746D702F61756469742D
  7465737473756974652D737641440066696C652D6F70656E617432
type=PATH msg=audit(1616012933.531:184): item=1 name="file-openat2"
  inode=29 dev=00:1f mode=0100600 ouid=0 ogid=0 rdev=00:00
  obj=unconfined_u:object_r:user_tmp_t:s0 nametype=CREATE
  cap_fp=0 cap_fi=0 cap_fe=0 cap_fver=0 cap_frootid=0
type=PATH msg=audit(1616012933.531:184):
  item=0 name="/root/rgb/git/audit-testsuite/tests"
  inode=25 dev=00:1f mode=040700 ouid=0 ogid=0 rdev=00:00
  obj=unconfined_u:object_r:user_tmp_t:s0 nametype=PARENT
  cap_fp=0 cap_fi=0 cap_fe=0 cap_fver=0 cap_frootid=0
type=CWD msg=audit(1616012933.531:184):
  cwd="/root/rgb/git/audit-testsuite/tests"
type=OPENAT2 msg=audit(1616012933.531:184):
  oflag=0100302 mode=0600 resolve=0xa
type=SYSCALL msg=audit(1616012933.531:184): arch=c000003e syscall=437
  success=yes exit=4 a0=3 a1=7ffe315f1c53 a2=7ffe315f1550 a3=18
  items=2 ppid=528 pid=540 auid=0 uid=0 gid=0 euid=0 suid=0
  fsuid=0 egid=0 sgid=0 fsgid=0 tty=ttyS0 ses=1 comm="openat2"
  exe="/root/rgb/git/audit-testsuite/tests/syscalls_file/openat2"
  subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
  key="testsuite-1616012933-bjAUcEPO"

Link: https://lore.kernel.org/r/d23fbb89186754487850367224b060e26f9b7181.1621363275.git.rgb@redhat.com
Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
[PM: tweak subject, wrap example, move AUDIT_OPENAT2 to 1337]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/uapi/linux/audit.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index daa481729e9b..afa2472ad5d6 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -118,6 +118,7 @@
 #define AUDIT_TIME_ADJNTPVAL	1333	/* NTP value adjustment */
 #define AUDIT_BPF		1334	/* BPF subsystem */
 #define AUDIT_EVENT_LISTENER	1335	/* Task joined multicast read socket */
+#define AUDIT_OPENAT2		1337	/* Record showing openat2 how args */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
-- 
cgit v1.2.3


From d154abdda6dcac92c63141035e477ac18077ffd8 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Fri, 24 Sep 2021 07:13:54 -0500
Subject: ipmi: Fix a typo

Spell "RESPONSE" correctly in a comment.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 include/uapi/linux/ipmi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ipmi.h b/include/uapi/linux/ipmi.h
index 32d148309b16..007e65f9243b 100644
--- a/include/uapi/linux/ipmi.h
+++ b/include/uapi/linux/ipmi.h
@@ -158,7 +158,7 @@ struct kernel_ipmi_msg {
  * is used for the receive in-kernel interface and in the receive
  * IOCTL.
  *
- * The "IPMI_RESPONSE_RESPNOSE_TYPE" is a little strange sounding, but
+ * The "IPMI_RESPONSE_RESPONSE_TYPE" is a little strange sounding, but
  * it allows you to get the message results when you send a response
  * message.
  */
-- 
cgit v1.2.3


From 059747c245f0e9af5e109eece7d3414dbe08d513 Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Fri, 24 Sep 2021 11:42:56 -0500
Subject: ipmi: Add support for IPMB direct messages

An application has come up that has a device sitting right on the IPMB
that would like to communicate with the BMC on the IPMB using normal
IPMI commands.

Sending these commands and handling the responses is easy enough, no
modifications are needed to the IPMI infrastructure.  But if this is an
application that also needs to receive IPMB commands and respond, some
way is needed to handle these incoming commands and send the responses.

Currently, the IPMI message handler only sends commands to the interface
and only receives responses from interface.  This change extends the
interface to receive commands/responses and send commands/responses.
These are formatted differently in support of receiving/sending IPMB
messages directly.

Signed-off-by: Corey Minyard <minyard@acm.org>
Tested-by: Andrew Manley <andrew.manley@sealingtech.com>
Reviewed-by: Andrew Manley <andrew.manley@sealingtech.com>
---
 include/uapi/linux/ipmi.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ipmi.h b/include/uapi/linux/ipmi.h
index 007e65f9243b..966c3070959b 100644
--- a/include/uapi/linux/ipmi.h
+++ b/include/uapi/linux/ipmi.h
@@ -80,6 +80,20 @@ struct ipmi_ipmb_addr {
 	unsigned char lun;
 };
 
+/*
+ * Used for messages received directly from an IPMB that have not gone
+ * through a MC.  This is for systems that sit right on an IPMB so
+ * they can receive commands and respond to them.
+ */
+#define IPMI_IPMB_DIRECT_ADDR_TYPE	0x81
+struct ipmi_ipmb_direct_addr {
+	int           addr_type;
+	short         channel;
+	unsigned char slave_addr;
+	unsigned char rs_lun;
+	unsigned char rq_lun;
+};
+
 /*
  * A LAN Address.  This is an address to/from a LAN interface bridged
  * by the BMC, not an address actually out on the LAN.
-- 
cgit v1.2.3


From 29a9f27574692a71c04fd41ca4bbf8eae842af13 Mon Sep 17 00:00:00 2001
From: Shuo Liu <shuo.a.liu@intel.com>
Date: Thu, 23 Sep 2021 16:41:27 +0800
Subject: virt: acrn: Introduce interfaces for MMIO device passthrough

MMIO device passthrough enables an OS in a virtual machine to directly
access a MMIO device in the host. It promises almost the native
performance, which is required in performance-critical scenarios of
ACRN.

HSM provides the following ioctls:
  - Assign - ACRN_IOCTL_ASSIGN_MMIODEV
    Pass data struct acrn_mmiodev from userspace to the hypervisor, and
    inform the hypervisor to assign a MMIO device to a User VM.

  - De-assign - ACRN_IOCTL_DEASSIGN_PCIDEV
    Pass data struct acrn_mmiodev from userspace to the hypervisor, and
    inform the hypervisor to de-assign a MMIO device from a User VM.

These new APIs will be used by user space code vm_assign_mmiodev and
vm_deassign_mmiodev in
https://github.com/projectacrn/acrn-hypervisor/blob/master/devicemodel/core/vmmapi.c

Signed-off-by: Shuo Liu <shuo.a.liu@intel.com>
Signed-off-by: Fei Li <fei1.li@intel.com>
Link: https://lore.kernel.org/r/20210923084128.18902-2-fei1.li@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/acrn.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/acrn.h b/include/uapi/linux/acrn.h
index 353b2a2e4536..470036d6b1ac 100644
--- a/include/uapi/linux/acrn.h
+++ b/include/uapi/linux/acrn.h
@@ -396,6 +396,7 @@ struct acrn_ptdev_irq {
 /* Type of PCI device assignment */
 #define ACRN_PTDEV_QUIRK_ASSIGN	(1U << 0)
 
+#define ACRN_MMIODEV_RES_NUM	3
 #define ACRN_PCI_NUM_BARS	6
 /**
  * struct acrn_pcidev - Info for assigning or de-assigning a PCI device
@@ -417,6 +418,29 @@ struct acrn_pcidev {
 	__u32	bar[ACRN_PCI_NUM_BARS];
 };
 
+/**
+ * struct acrn_mmiodev - Info for assigning or de-assigning a MMIO device
+ * @name:			Name of the MMIO device.
+ * @res[].user_vm_pa:		Physical address of User VM of the MMIO region
+ *				for the MMIO device.
+ * @res[].service_vm_pa:	Physical address of Service VM of the MMIO
+ *				region for the MMIO device.
+ * @res[].size:			Size of the MMIO region for the MMIO device.
+ * @res[].mem_type:		Memory type of the MMIO region for the MMIO
+ *				device.
+ *
+ * This structure will be passed to hypervisor directly.
+ */
+struct acrn_mmiodev {
+	__u8	name[8];
+	struct {
+		__u64	user_vm_pa;
+		__u64	service_vm_pa;
+		__u64	size;
+		__u64	mem_type;
+	} res[ACRN_MMIODEV_RES_NUM];
+};
+
 /**
  * struct acrn_msi_entry - Info for injecting a MSI interrupt to a VM
  * @msi_addr:	MSI addr[19:12] with dest vCPU ID
@@ -568,6 +592,10 @@ struct acrn_irqfd {
 	_IOW(ACRN_IOCTL_TYPE, 0x55, struct acrn_pcidev)
 #define ACRN_IOCTL_DEASSIGN_PCIDEV	\
 	_IOW(ACRN_IOCTL_TYPE, 0x56, struct acrn_pcidev)
+#define ACRN_IOCTL_ASSIGN_MMIODEV	\
+	_IOW(ACRN_IOCTL_TYPE, 0x57, struct acrn_mmiodev)
+#define ACRN_IOCTL_DEASSIGN_MMIODEV	\
+	_IOW(ACRN_IOCTL_TYPE, 0x58, struct acrn_mmiodev)
 
 #define ACRN_IOCTL_PM_GET_CPU_STATE	\
 	_IOWR(ACRN_IOCTL_TYPE, 0x60, __u64)
-- 
cgit v1.2.3


From 424f1ac2d832f31a2814c799bd50decf6a9f8e74 Mon Sep 17 00:00:00 2001
From: Shuo Liu <shuo.a.liu@intel.com>
Date: Thu, 23 Sep 2021 16:41:28 +0800
Subject: virt: acrn: Introduce interfaces for virtual device
 creating/destroying

The ACRN hypervisor can emulate a virtual device within hypervisor for a
Guest VM. The emulated virtual device can work without the ACRN
userspace after creation. The hypervisor do the emulation of that device.

To support the virtual device creating/destroying, HSM provides the
following ioctls:
  - ACRN_IOCTL_CREATE_VDEV
    Pass data struct acrn_vdev from userspace to the hypervisor, and inform
    the hypervisor to create a virtual device for a User VM.
  - ACRN_IOCTL_DESTROY_VDEV
    Pass data struct acrn_vdev from userspace to the hypervisor, and inform
    the hypervisor to destroy a virtual device of a User VM.

These new APIs will be used by user space code vm_add_hv_vdev and
vm_remove_hv_vdev in
https://github.com/projectacrn/acrn-hypervisor/blob/master/devicemodel/core/vmmapi.c

Signed-off-by: Shuo Liu <shuo.a.liu@intel.com>
Signed-off-by: Fei Li <fei1.li@intel.com>
Link: https://lore.kernel.org/r/20210923084128.18902-3-fei1.li@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/acrn.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/acrn.h b/include/uapi/linux/acrn.h
index 470036d6b1ac..ccf47ed92500 100644
--- a/include/uapi/linux/acrn.h
+++ b/include/uapi/linux/acrn.h
@@ -441,6 +441,44 @@ struct acrn_mmiodev {
 	} res[ACRN_MMIODEV_RES_NUM];
 };
 
+/**
+ * struct acrn_vdev - Info for creating or destroying a virtual device
+ * @id:				Union of identifier of the virtual device
+ * @id.value:			Raw data of the identifier
+ * @id.fields.vendor:		Vendor id of the virtual PCI device
+ * @id.fields.device:		Device id of the virtual PCI device
+ * @id.fields.legacy_id:	ID of the virtual device if not a PCI device
+ * @slot:			Virtual Bus/Device/Function of the virtual
+ *				device
+ * @io_base:			IO resource base address of the virtual device
+ * @io_size:			IO resource size of the virtual device
+ * @args:			Arguments for the virtual device creation
+ *
+ * The created virtual device can be a PCI device or a legacy device (e.g.
+ * a virtual UART controller) and it is emulated by the hypervisor. This
+ * structure will be passed to hypervisor directly.
+ */
+struct acrn_vdev {
+	/*
+	 * the identifier of the device, the low 32 bits represent the vendor
+	 * id and device id of PCI device and the high 32 bits represent the
+	 * device number of the legacy device
+	 */
+	union {
+		__u64 value;
+		struct {
+			__le16 vendor;
+			__le16 device;
+			__le32 legacy_id;
+		} fields;
+	} id;
+
+	__u64	slot;
+	__u32	io_addr[ACRN_PCI_NUM_BARS];
+	__u32	io_size[ACRN_PCI_NUM_BARS];
+	__u8	args[128];
+};
+
 /**
  * struct acrn_msi_entry - Info for injecting a MSI interrupt to a VM
  * @msi_addr:	MSI addr[19:12] with dest vCPU ID
@@ -596,6 +634,10 @@ struct acrn_irqfd {
 	_IOW(ACRN_IOCTL_TYPE, 0x57, struct acrn_mmiodev)
 #define ACRN_IOCTL_DEASSIGN_MMIODEV	\
 	_IOW(ACRN_IOCTL_TYPE, 0x58, struct acrn_mmiodev)
+#define ACRN_IOCTL_CREATE_VDEV	\
+	_IOW(ACRN_IOCTL_TYPE, 0x59, struct acrn_vdev)
+#define ACRN_IOCTL_DESTROY_VDEV	\
+	_IOW(ACRN_IOCTL_TYPE, 0x5A, struct acrn_vdev)
 
 #define ACRN_IOCTL_PM_GET_CPU_STATE	\
 	_IOWR(ACRN_IOCTL_TYPE, 0x60, __u64)
-- 
cgit v1.2.3


From 95a13ee858c9e426e63c97063677736f74af7163 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 1 Oct 2021 16:55:44 +0300
Subject: hyper-v: Replace uuid.h with types.h

There is no user of anything in uuid.h in the hyperv.h. Replace it with
more appropriate types.h.

Fixes: f081bbb3fd03 ("hyper-v: Remove internal types from UAPI header")
Reported-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Link: https://lore.kernel.org/r/20211001135544.1823-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 include/uapi/linux/hyperv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
index 6135d92e0d47..daf82a230c0e 100644
--- a/include/uapi/linux/hyperv.h
+++ b/include/uapi/linux/hyperv.h
@@ -26,7 +26,7 @@
 #ifndef _UAPI_HYPERV_H
 #define _UAPI_HYPERV_H
 
-#include <linux/uuid.h>
+#include <linux/types.h>
 
 /*
  * Framework version for util services.
-- 
cgit v1.2.3


From 353407d917b2d87cd8104a0453d012439c6ca4be Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 6 Oct 2021 13:46:42 +0300
Subject: ethtool: Add ability to control transceiver modules' power mode

Add a pair of new ethtool messages, 'ETHTOOL_MSG_MODULE_SET' and
'ETHTOOL_MSG_MODULE_GET', that can be used to control transceiver
modules parameters and retrieve their status.

The first parameter to control is the power mode of the module. It is
only relevant for paged memory modules, as flat memory modules always
operate in low power mode.

When a paged memory module is in low power mode, its power consumption
is reduced to the minimum, the management interface towards the host is
available and the data path is deactivated.

User space can choose to put modules that are not currently in use in
low power mode and transition them to high power mode before putting the
associated ports administratively up. This is useful for user space that
favors reduced power consumption and lower temperatures over reduced
link up times. In QSFP-DD modules the transition from low power mode to
high power mode can take a few seconds and this transition is only
expected to get longer with future / more complex modules.

User space can control the power mode of the module via the power mode
policy attribute ('ETHTOOL_A_MODULE_POWER_MODE_POLICY'). Possible
values:

* high: Module is always in high power mode.

* auto: Module is transitioned by the host to high power mode when the
  first port using it is put administratively up and to low power mode
  when the last port using it is put administratively down.

The operational power mode of the module is available to user space via
the 'ETHTOOL_A_MODULE_POWER_MODE' attribute. The attribute is not
reported to user space when a module is not plugged-in.

The user API is designed to be generic enough so that it could be used
for modules with different memory maps (e.g., SFF-8636, CMIS).

The only implementation of the device driver API in this series is for a
MAC driver (mlxsw) where the module is controlled by the device's
firmware, but it is designed to be generic enough so that it could also
be used by implementations where the module is controlled by the CPU.

CMIS testing
============

 # ethtool -m swp11
 Identifier                                : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
 ...
 Module State                              : 0x03 (ModuleReady)
 LowPwrAllowRequestHW                      : Off
 LowPwrRequestSW                           : Off

The module is not in low power mode, as it is not forced by hardware
(LowPwrAllowRequestHW is off) or by software (LowPwrRequestSW is off).

The power mode can be queried from the kernel. In case
LowPwrAllowRequestHW was on, the kernel would need to take into account
the state of the LowPwrRequestHW signal, which is not visible to user
space.

 $ ethtool --show-module swp11
 Module parameters for swp11:
 power-mode-policy high
 power-mode high

Change the power mode policy to 'auto':

 # ethtool --set-module swp11 power-mode-policy auto

Query the power mode again:

 $ ethtool --show-module swp11
 Module parameters for swp11:
 power-mode-policy auto
 power-mode low

Verify with the data read from the EEPROM:

 # ethtool -m swp11
 Identifier                                : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
 ...
 Module State                              : 0x01 (ModuleLowPwr)
 LowPwrAllowRequestHW                      : Off
 LowPwrRequestSW                           : On

Put the associated port administratively up which will instruct the host
to transition the module to high power mode:

 # ip link set dev swp11 up

Query the power mode again:

 $ ethtool --show-module swp11
 Module parameters for swp11:
 power-mode-policy auto
 power-mode high

Verify with the data read from the EEPROM:

 # ethtool -m swp11
 Identifier                                : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
 ...
 Module State                              : 0x03 (ModuleReady)
 LowPwrAllowRequestHW                      : Off
 LowPwrRequestSW                           : Off

Put the associated port administratively down which will instruct the
host to transition the module to low power mode:

 # ip link set dev swp11 down

Query the power mode again:

 $ ethtool --show-module swp11
 Module parameters for swp11:
 power-mode-policy auto
 power-mode low

Verify with the data read from the EEPROM:

 # ethtool -m swp11
 Identifier                                : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
 ...
 Module State                              : 0x01 (ModuleLowPwr)
 LowPwrAllowRequestHW                      : Off
 LowPwrRequestSW                           : On

SFF-8636 testing
================

 # ethtool -m swp13
 Identifier                                : 0x11 (QSFP28)
 ...
 Extended identifier description           : 5.0W max. Power consumption,  High Power Class (> 3.5 W) enabled
 Power set                                 : Off
 Power override                            : On
 ...
 Transmit avg optical power (Channel 1)    : 0.7733 mW / -1.12 dBm
 Transmit avg optical power (Channel 2)    : 0.7649 mW / -1.16 dBm
 Transmit avg optical power (Channel 3)    : 0.7790 mW / -1.08 dBm
 Transmit avg optical power (Channel 4)    : 0.7837 mW / -1.06 dBm
 Rcvr signal avg optical power(Channel 1)  : 0.9302 mW / -0.31 dBm
 Rcvr signal avg optical power(Channel 2)  : 0.9079 mW / -0.42 dBm
 Rcvr signal avg optical power(Channel 3)  : 0.8993 mW / -0.46 dBm
 Rcvr signal avg optical power(Channel 4)  : 0.8778 mW / -0.57 dBm

The module is not in low power mode, as it is not forced by hardware
(Power override is on) or by software (Power set is off).

The power mode can be queried from the kernel. In case Power override
was off, the kernel would need to take into account the state of the
LPMode signal, which is not visible to user space.

 $ ethtool --show-module swp13
 Module parameters for swp13:
 power-mode-policy high
 power-mode high

Change the power mode policy to 'auto':

 # ethtool --set-module swp13 power-mode-policy auto

Query the power mode again:

 $ ethtool --show-module swp13
 Module parameters for swp13:
 power-mode-policy auto
 power-mode low

Verify with the data read from the EEPROM:

 # ethtool -m swp13
 Identifier                                : 0x11 (QSFP28)
 Extended identifier description           : 5.0W max. Power consumption,  High Power Class (> 3.5 W) not enabled
 Power set                                 : On
 Power override                            : On
 ...
 Transmit avg optical power (Channel 1)    : 0.0000 mW / -inf dBm
 Transmit avg optical power (Channel 2)    : 0.0000 mW / -inf dBm
 Transmit avg optical power (Channel 3)    : 0.0000 mW / -inf dBm
 Transmit avg optical power (Channel 4)    : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 1)  : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 2)  : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 3)  : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 4)  : 0.0000 mW / -inf dBm

Put the associated port administratively up which will instruct the host
to transition the module to high power mode:

 # ip link set dev swp13 up

Query the power mode again:

 $ ethtool --show-module swp13
 Module parameters for swp13:
 power-mode-policy auto
 power-mode high

Verify with the data read from the EEPROM:

 # ethtool -m swp13
 Identifier                                : 0x11 (QSFP28)
 ...
 Extended identifier description           : 5.0W max. Power consumption,  High Power Class (> 3.5 W) enabled
 Power set                                 : Off
 Power override                            : On
 ...
 Transmit avg optical power (Channel 1)    : 0.7934 mW / -1.01 dBm
 Transmit avg optical power (Channel 2)    : 0.7859 mW / -1.05 dBm
 Transmit avg optical power (Channel 3)    : 0.7885 mW / -1.03 dBm
 Transmit avg optical power (Channel 4)    : 0.7985 mW / -0.98 dBm
 Rcvr signal avg optical power(Channel 1)  : 0.9325 mW / -0.30 dBm
 Rcvr signal avg optical power(Channel 2)  : 0.9034 mW / -0.44 dBm
 Rcvr signal avg optical power(Channel 3)  : 0.9086 mW / -0.42 dBm
 Rcvr signal avg optical power(Channel 4)  : 0.8885 mW / -0.51 dBm

Put the associated port administratively down which will instruct the
host to transition the module to low power mode:

 # ip link set dev swp13 down

Query the power mode again:

 $ ethtool --show-module swp13
 Module parameters for swp13:
 power-mode-policy auto
 power-mode low

Verify with the data read from the EEPROM:

 # ethtool -m swp13
 Identifier                                : 0x11 (QSFP28)
 ...
 Extended identifier description           : 5.0W max. Power consumption,  High Power Class (> 3.5 W) not enabled
 Power set                                 : On
 Power override                            : On
 ...
 Transmit avg optical power (Channel 1)    : 0.0000 mW / -inf dBm
 Transmit avg optical power (Channel 2)    : 0.0000 mW / -inf dBm
 Transmit avg optical power (Channel 3)    : 0.0000 mW / -inf dBm
 Transmit avg optical power (Channel 4)    : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 1)  : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 2)  : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 3)  : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 4)  : 0.0000 mW / -inf dBm

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ethtool.h         | 23 +++++++++++++++++++++++
 include/uapi/linux/ethtool_netlink.h | 17 +++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index b6db6590baf0..6de61d53ca5d 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -706,6 +706,29 @@ enum ethtool_stringset {
 	ETH_SS_COUNT
 };
 
+/**
+ * enum ethtool_module_power_mode_policy - plug-in module power mode policy
+ * @ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH: Module is always in high power mode.
+ * @ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO: Module is transitioned by the host
+ *	to high power mode when the first port using it is put administratively
+ *	up and to low power mode when the last port using it is put
+ *	administratively down.
+ */
+enum ethtool_module_power_mode_policy {
+	ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH = 1,
+	ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO,
+};
+
+/**
+ * enum ethtool_module_power_mode - plug-in module power mode
+ * @ETHTOOL_MODULE_POWER_MODE_LOW: Module is in low power mode.
+ * @ETHTOOL_MODULE_POWER_MODE_HIGH: Module is in high power mode.
+ */
+enum ethtool_module_power_mode {
+	ETHTOOL_MODULE_POWER_MODE_LOW = 1,
+	ETHTOOL_MODULE_POWER_MODE_HIGH,
+};
+
 /**
  * struct ethtool_gstrings - string set for data tagging
  * @cmd: Command number = %ETHTOOL_GSTRINGS
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 5545f1ca9237..ca5fbb59fa42 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -47,6 +47,8 @@ enum {
 	ETHTOOL_MSG_MODULE_EEPROM_GET,
 	ETHTOOL_MSG_STATS_GET,
 	ETHTOOL_MSG_PHC_VCLOCKS_GET,
+	ETHTOOL_MSG_MODULE_GET,
+	ETHTOOL_MSG_MODULE_SET,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
@@ -90,6 +92,8 @@ enum {
 	ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY,
 	ETHTOOL_MSG_STATS_GET_REPLY,
 	ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY,
+	ETHTOOL_MSG_MODULE_GET_REPLY,
+	ETHTOOL_MSG_MODULE_NTF,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
@@ -833,6 +837,19 @@ enum {
 	ETHTOOL_A_STATS_RMON_MAX = (__ETHTOOL_A_STATS_RMON_CNT - 1)
 };
 
+/* MODULE */
+
+enum {
+	ETHTOOL_A_MODULE_UNSPEC,
+	ETHTOOL_A_MODULE_HEADER,		/* nest - _A_HEADER_* */
+	ETHTOOL_A_MODULE_POWER_MODE_POLICY,	/* u8 */
+	ETHTOOL_A_MODULE_POWER_MODE,		/* u8 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_MODULE_CNT,
+	ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
-- 
cgit v1.2.3


From 3dfb51126064b594470b9c0b278188fbc9194709 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 6 Oct 2021 13:46:46 +0300
Subject: ethtool: Add transceiver module extended state

Add an extended state and sub-state to describe link issues related to
transceiver modules.

The 'ETHTOOL_LINK_EXT_SUBSTATE_MODULE_CMIS_NOT_READY' extended sub-state
tells user space that port is unable to gain a carrier because the CMIS
Module State Machine did not reach the ModuleReady (Fully Operational)
state. For example, if the module is stuck at ModuleLowPwr or
ModuleFault state. In case of the latter, user space can read the fault
reason from the module's EEPROM and potentially reset it.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ethtool.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 6de61d53ca5d..a2223b685451 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -603,6 +603,7 @@ enum ethtool_link_ext_state {
 	ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE,
 	ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED,
 	ETHTOOL_LINK_EXT_STATE_OVERHEAT,
+	ETHTOOL_LINK_EXT_STATE_MODULE,
 };
 
 /* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */
@@ -649,6 +650,11 @@ enum ethtool_link_ext_substate_cable_issue {
 	ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE,
 };
 
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_MODULE. */
+enum ethtool_link_ext_substate_module {
+	ETHTOOL_LINK_EXT_SUBSTATE_MODULE_CMIS_NOT_READY = 1,
+};
+
 #define ETH_GSTRING_LEN		32
 
 /**
-- 
cgit v1.2.3


From bf69bad38cf63d980e8a603f8d1bd1f85b5ed3d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Thu, 23 Sep 2021 14:11:05 -0300
Subject: futex: Implement sys_futex_waitv()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support to wait on multiple futexes. This is the interface
implemented by this syscall:

futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes,
	    unsigned int flags, struct timespec *timeout, clockid_t clockid)

struct futex_waitv {
	__u64 val;
	__u64 uaddr;
	__u32 flags;
	__u32 __reserved;
};

Given an array of struct futex_waitv, wait on each uaddr. The thread
wakes if a futex_wake() is performed at any uaddr. The syscall returns
immediately if any waiter has *uaddr != val. *timeout is an optional
absolute timeout value for the operation. This syscall supports only
64bit sized timeout structs. The flags argument of the syscall should be
empty, but it can be used for future extensions. Flags for shared
futexes, sizes, etc. should be used on the individual flags of each
waiter.

__reserved is used for explicit padding and should be 0, but it might be
used for future extensions. If the userspace uses 32-bit pointers, it
should make sure to explicitly cast it when assigning to waitv::uaddr.

Returns the array index of one of the woken futexes. There’s no given
information of how many were woken, or any particular attribute of it
(if it’s the first woken, if it is of the smaller index...).

Signed-off-by: André Almeida <andrealmeid@collabora.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210923171111.300673-17-andrealmeid@collabora.com
---
 include/uapi/linux/futex.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index 235e5b2facaa..71a5df8d2689 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -43,6 +43,31 @@
 #define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
 					 FUTEX_PRIVATE_FLAG)
 
+/*
+ * Flags to specify the bit length of the futex word for futex2 syscalls.
+ * Currently, only 32 is supported.
+ */
+#define FUTEX_32		2
+
+/*
+ * Max numbers of elements in a futex_waitv array
+ */
+#define FUTEX_WAITV_MAX		128
+
+/**
+ * struct futex_waitv - A waiter for vectorized wait
+ * @val:	Expected value at uaddr
+ * @uaddr:	User address to wait on
+ * @flags:	Flags for this waiter
+ * @__reserved:	Reserved member to preserve data alignment. Should be 0.
+ */
+struct futex_waitv {
+	__u64 val;
+	__u64 uaddr;
+	__u32 flags;
+	__u32 __reserved;
+};
+
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
  * thread exit time.
-- 
cgit v1.2.3


From 460275f124fb072dca218a6b43b6370eebbab20d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Tue, 5 Oct 2021 20:09:40 +0200
Subject: PCI: Add PCI_EXP_DEVCTL_PAYLOAD_* macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Define a macro PCI_EXP_DEVCTL_PAYLOAD_* for every possible Max Payload
Size in linux/pci_regs.h, in the same style as PCI_EXP_DEVCTL_READRQ_*.

Link: https://lore.kernel.org/r/20211005180952.6812-2-kabel@kernel.org
Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/uapi/linux/pci_regs.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index e709ae8235e7..ff6ccbc6efe9 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -504,6 +504,12 @@
 #define  PCI_EXP_DEVCTL_URRE	0x0008	/* Unsupported Request Reporting En. */
 #define  PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */
 #define  PCI_EXP_DEVCTL_PAYLOAD	0x00e0	/* Max_Payload_Size */
+#define  PCI_EXP_DEVCTL_PAYLOAD_128B 0x0000 /* 128 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_256B 0x0020 /* 256 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_512B 0x0040 /* 512 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_1024B 0x0060 /* 1024 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_2048B 0x0080 /* 2048 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_4096B 0x00a0 /* 4096 Bytes */
 #define  PCI_EXP_DEVCTL_EXT_TAG	0x0100	/* Extended Tag Field Enable */
 #define  PCI_EXP_DEVCTL_PHANTOM	0x0200	/* Phantom Functions Enable */
 #define  PCI_EXP_DEVCTL_AUX_PME	0x0400	/* Auxiliary Power PM Enable */
-- 
cgit v1.2.3


From 4c1e34c0dbffb17accdfe16ac97ab432df9024ff Mon Sep 17 00:00:00 2001
From: Richard Palethorpe <rpalethorpe@suse.com>
Date: Fri, 8 Oct 2021 11:00:53 +0100
Subject: vsock: Enable y2038 safe timeval for timeout

Reuse the timeval compat code from core/sock to handle 32-bit and
64-bit timeval structures. Also introduce a new socket option define
to allow using y2038 safe timeval under 32-bit.

The existing behavior of sock_set_timeout and vsock's timeout setter
differ when the time value is out of bounds. vsocks current behavior
is retained at the expense of not being able to share the full
implementation.

This allows the LTP test vsock01 to pass under 32-bit compat mode.

Fixes: fe0c72f3db11 ("socket: move compat timeout handling into sock.c")
Signed-off-by: Richard Palethorpe <rpalethorpe@suse.com>
Cc: Richard Palethorpe <rpalethorpe@richiejp.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/vm_sockets.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h
index 46918a1852d7..c60ca33eac59 100644
--- a/include/uapi/linux/vm_sockets.h
+++ b/include/uapi/linux/vm_sockets.h
@@ -64,7 +64,7 @@
  * timeout for a STREAM socket.
  */
 
-#define SO_VM_SOCKETS_CONNECT_TIMEOUT 6
+#define SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD 6
 
 /* Option name for using non-blocking send/receive.  Use as the option name
  * for setsockopt(3) or getsockopt(3) to set or get the non-blocking
@@ -81,6 +81,17 @@
 
 #define SO_VM_SOCKETS_NONBLOCK_TXRX 7
 
+#define SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW 8
+
+#if !defined(__KERNEL__)
+#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
+#define SO_VM_SOCKETS_CONNECT_TIMEOUT SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD
+#else
+#define SO_VM_SOCKETS_CONNECT_TIMEOUT \
+	(sizeof(time_t) == sizeof(__kernel_long_t) ? SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD : SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW)
+#endif
+#endif
+
 /* The vSocket equivalent of INADDR_ANY.  This works for the svm_cid field of
  * sockaddr_vm and indicates the context ID of the current endpoint.
  */
-- 
cgit v1.2.3


From 2c611ad97a82b51221bb0920cc6cac0b1d4c0e52 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@nvidia.com>
Date: Mon, 11 Oct 2021 14:12:37 +0200
Subject: net, neigh: Extend neigh->flags to 32 bit to allow for extensions

Currently, all bits in struct ndmsg's ndm_flags are used up with the most
recent addition of 435f2e7cc0b7 ("net: bridge: add support for sticky fdb
entries"). This makes it impossible to extend the neighboring subsystem
with new NTF_* flags:

  struct ndmsg {
    __u8   ndm_family;
    __u8   ndm_pad1;
    __u16  ndm_pad2;
    __s32  ndm_ifindex;
    __u16  ndm_state;
    __u8   ndm_flags;
    __u8   ndm_type;
  };

There are ndm_pad{1,2} attributes which are not used. However, due to
uncareful design, the kernel does not enforce them to be zero upon new
neighbor entry addition, and given they've been around forever, it is
not possible to reuse them today due to risk of breakage. One option to
overcome this limitation is to add a new NDA_FLAGS_EXT attribute for
extended flags.

In struct neighbour, there is a 3 byte hole between protocol and ha_lock,
which allows neigh->flags to be extended from 8 to 32 bits while still
being on the same cacheline as before. This also allows for all future
NTF_* flags being in neigh->flags rather than yet another flags field.
Unknown flags in NDA_FLAGS_EXT will be rejected by the kernel.

Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Roopa Prabhu <roopa@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/neighbour.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 00a60695fa53..a80cca141855 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -31,6 +31,7 @@ enum {
 	NDA_PROTOCOL,  /* Originator of entry */
 	NDA_NH_ID,
 	NDA_FDB_EXT_ATTRS,
+	NDA_FLAGS_EXT,
 	__NDA_MAX
 };
 
-- 
cgit v1.2.3


From 7482e3841d520a368426ac196720601687e2dc47 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 11 Oct 2021 14:12:38 +0200
Subject: net, neigh: Add NTF_MANAGED flag for managed neighbor entries

Allow a user space control plane to insert entries with a new NTF_EXT_MANAGED
flag. The flag then indicates to the kernel that the neighbor entry should be
periodically probed for keeping the entry in NUD_REACHABLE state iff possible.

The use case for this is targeting XDP or tc BPF load-balancers which use
the bpf_fib_lookup() BPF helper in order to piggyback on neighbor resolution
for their backends. Given they cannot be resolved in fast-path, a control
plane inserts the L3 (without L2) entries manually into the neighbor table
and lets the kernel do the neighbor resolution either on the gateway or on
the backend directly in case the latter resides in the same L2. This avoids
to deal with L2 in the control plane and to rebuild what the kernel already
does best anyway.

NTF_EXT_MANAGED can be combined with NTF_EXT_LEARNED in order to avoid GC
eviction. The kernel then adds NTF_MANAGED flagged entries to a per-neighbor
table which gets triggered by the system work queue to periodically call
neigh_event_send() for performing the resolution. The implementation allows
migration from/to NTF_MANAGED neighbor entries, so that already existing
entries can be converted by the control plane if needed. Potentially, we could
make the interval for periodically calling neigh_event_send() configurable;
right now it's set to DELAY_PROBE_TIME which is also in line with mlxsw which
has similar driver-internal infrastructure c723c735fa6b ("mlxsw: spectrum_router:
Periodically update the kernel's neigh table"). In future, the latter could
possibly reuse the NTF_MANAGED neighbors as well.

Example:

  # ./ip/ip n replace 192.168.178.30 dev enp5s0 managed extern_learn
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a managed extern_learn REACHABLE
  [...]

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Roopa Prabhu <roopa@nvidia.com>
Link: https://linuxplumbersconf.org/event/11/contributions/953/
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/neighbour.h | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index a80cca141855..db05fb55055e 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -41,14 +41,16 @@ enum {
  *	Neighbor Cache Entry Flags
  */
 
-#define NTF_USE		0x01
-#define NTF_SELF	0x02
-#define NTF_MASTER	0x04
-#define NTF_PROXY	0x08	/* == ATF_PUBL */
-#define NTF_EXT_LEARNED	0x10
-#define NTF_OFFLOADED   0x20
-#define NTF_STICKY	0x40
-#define NTF_ROUTER	0x80
+#define NTF_USE		(1 << 0)
+#define NTF_SELF	(1 << 1)
+#define NTF_MASTER	(1 << 2)
+#define NTF_PROXY	(1 << 3)	/* == ATF_PUBL */
+#define NTF_EXT_LEARNED	(1 << 4)
+#define NTF_OFFLOADED   (1 << 5)
+#define NTF_STICKY	(1 << 6)
+#define NTF_ROUTER	(1 << 7)
+/* Extended flags under NDA_FLAGS_EXT: */
+#define NTF_EXT_MANAGED	(1 << 0)
 
 /*
  *	Neighbor Cache Entry States.
@@ -66,12 +68,22 @@ enum {
 #define NUD_PERMANENT	0x80
 #define NUD_NONE	0x00
 
-/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
- * and make no address resolution or NUD.
- * NUD_PERMANENT also cannot be deleted by garbage collectors.
+/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
+ * address resolution or NUD.
+ *
+ * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
+ * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
+ * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
+ * flagged entries explicitly are (which is also consistent with the routing
+ * subsystem).
+ *
  * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
  * states don't make sense and thus are ignored. Such entries don't age and
  * can roam.
+ *
+ * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
+ * of a user space control plane, and automatically refreshed so that (if
+ * possible) they remain in NUD_REACHABLE state.
  */
 
 struct nda_cacheinfo {
-- 
cgit v1.2.3


From 42df6e1d221dddc0f2acf2be37e68d553ad65f96 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Fri, 8 Oct 2021 22:06:03 +0200
Subject: netfilter: Introduce egress hook
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Support classifying packets with netfilter on egress to satisfy user
requirements such as:
* outbound security policies for containers (Laura)
* filtering and mangling intra-node Direct Server Return (DSR) traffic
  on a load balancer (Laura)
* filtering locally generated traffic coming in through AF_PACKET,
  such as local ARP traffic generated for clustering purposes or DHCP
  (Laura; the AF_PACKET plumbing is contained in a follow-up commit)
* L2 filtering from ingress and egress for AVB (Audio Video Bridging)
  and gPTP with nftables (Pablo)
* in the future: in-kernel NAT64/NAT46 (Pablo)

The egress hook introduced herein complements the ingress hook added by
commit e687ad60af09 ("netfilter: add netfilter ingress hook after
handle_ing() under unique static key").  A patch for nftables to hook up
egress rules from user space has been submitted separately, so users may
immediately take advantage of the feature.

Alternatively or in addition to netfilter, packets can be classified
with traffic control (tc).  On ingress, packets are classified first by
tc, then by netfilter.  On egress, the order is reversed for symmetry.
Conceptually, tc and netfilter can be thought of as layers, with
netfilter layered above tc.

Traffic control is capable of redirecting packets to another interface
(man 8 tc-mirred).  E.g., an ingress packet may be redirected from the
host namespace to a container via a veth connection:
tc ingress (host) -> tc egress (veth host) -> tc ingress (veth container)

In this case, netfilter egress classifying is not performed when leaving
the host namespace!  That's because the packet is still on the tc layer.
If tc redirects the packet to a physical interface in the host namespace
such that it leaves the system, the packet is never subjected to
netfilter egress classifying.  That is only logical since it hasn't
passed through netfilter ingress classifying either.

Packets can alternatively be redirected at the netfilter layer using
nft fwd.  Such a packet *is* subjected to netfilter egress classifying
since it has reached the netfilter layer.

Internally, the skb->nf_skip_egress flag controls whether netfilter is
invoked on egress by __dev_queue_xmit().  Because __dev_queue_xmit() may
be called recursively by tunnel drivers such as vxlan, the flag is
reverted to false after sch_handle_egress().  This ensures that
netfilter is applied both on the overlay and underlying network.

Interaction between tc and netfilter is possible by setting and querying
skb->mark.

If netfilter egress classifying is not enabled on any interface, it is
patched out of the data path by way of a static_key and doesn't make a
performance difference that is discernible from noise:

Before:             1537 1538 1538 1537 1538 1537 Mb/sec
After:              1536 1534 1539 1539 1539 1540 Mb/sec
Before + tc accept: 1418 1418 1418 1419 1419 1418 Mb/sec
After  + tc accept: 1419 1424 1418 1419 1422 1420 Mb/sec
Before + tc drop:   1620 1619 1619 1619 1620 1620 Mb/sec
After  + tc drop:   1616 1624 1625 1624 1622 1619 Mb/sec

When netfilter egress classifying is enabled on at least one interface,
a minimal performance penalty is incurred for every egress packet, even
if the interface it's transmitted over doesn't have any netfilter egress
rules configured.  That is caused by checking dev->nf_hooks_egress
against NULL.

Measurements were performed on a Core i7-3615QM.  Commands to reproduce:
ip link add dev foo type dummy
ip link set dev foo up
modprobe pktgen
echo "add_device foo" > /proc/net/pktgen/kpktgend_3
samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh -i foo -n 400000000 -m "11:11:11:11:11:11" -d 1.1.1.1

Accept all traffic with tc:
tc qdisc add dev foo clsact
tc filter add dev foo egress bpf da bytecode '1,6 0 0 0,'

Drop all traffic with tc:
tc qdisc add dev foo clsact
tc filter add dev foo egress bpf da bytecode '1,6 0 0 2,'

Apply this patch when measuring packet drops to avoid errors in dmesg:
https://lore.kernel.org/netdev/a73dda33-57f4-95d8-ea51-ed483abd6a7a@iogearbox.net/

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Cc: Laura García Liébana <nevola@gmail.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index ef9a44286e23..53411ccc69db 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -51,6 +51,7 @@ enum nf_inet_hooks {
 
 enum nf_dev_hooks {
 	NF_NETDEV_INGRESS,
+	NF_NETDEV_EGRESS,
 	NF_NETDEV_NUMHOOKS
 };
 
-- 
cgit v1.2.3


From 8b8ff8cc3b8155c18162e8b1f70e1230db176862 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 7 Sep 2021 19:39:01 +0300
Subject: perf/x86: Add new event for AUX output counter index

PEBS-via-PT records contain a mask of applicable counters. To identify
which event belongs to which counter, a side-band event is needed. Until
now, there has been no side-band event, and consequently users were limited
to using a single event.

Add such a side-band event. Note the event is optimised to output only
when the counter index changes for an event. That works only so long as
all PEBS-via-PT events are scheduled together, which they are for a
recording session because they are in a single group.

Also no attribute bit is used to select the new event, so a new
kernel is not compatible with older perf tools.  The assumption
being that PEBS-via-PT is sufficiently esoteric that users will not
be troubled by this.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210907163903.11820-2-adrian.hunter@intel.com
---
 include/uapi/linux/perf_event.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index f92880a15645..c89535de1ec8 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1141,6 +1141,21 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_TEXT_POKE			= 20,
 
+	/*
+	 * Data written to the AUX area by hardware due to aux_output, may need
+	 * to be matched to the event by an architecture-specific hardware ID.
+	 * This records the hardware ID, but requires sample_id to provide the
+	 * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT
+	 * records from multiple events.
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				hw_id;
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_AUX_OUTPUT_HW_ID		= 21,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
-- 
cgit v1.2.3


From e72aeb9ee0e34c57dc90793d0bf82cab9624d64e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 14 Oct 2021 10:59:18 -0700
Subject: fq_codel: implement L4S style ce_threshold_ect1 marking

Add TCA_FQ_CODEL_CE_THRESHOLD_ECT1 boolean option to select Low Latency,
Low Loss, Scalable Throughput (L4S) style marking, along with ce_threshold.

If enabled, only packets with ECT(1) can be transformed to CE
if their sojourn time is above the ce_threshold.

Note that this new option does not change rules for codel law.
In particular, if TCA_FQ_CODEL_ECN is left enabled (this is
the default when fq_codel qdisc is created), ECT(0) packets can
still get CE if codel law (as governed by limit/target) decides so.

Section 4.3.b of current draft [1] states:

b.  A scheduler with per-flow queues such as FQ-CoDel or FQ-PIE can
    be used for L4S.  For instance within each queue of an FQ-CoDel
    system, as well as a CoDel AQM, there is typically also ECN
    marking at an immediate (unsmoothed) shallow threshold to support
    use in data centres (see Sec.5.2.7 of [RFC8290]).  This can be
    modified so that the shallow threshold is solely applied to
    ECT(1) packets.  Then if there is a flow of non-ECN or ECT(0)
    packets in the per-flow-queue, the Classic AQM (e.g.  CoDel) is
    applied; while if there is a flow of ECT(1) packets in the queue,
    the shallower (typically sub-millisecond) threshold is applied.

Tested:

tc qd replace dev eth1 root fq_codel ce_threshold_ect1 50usec

netperf ... -t TCP_STREAM -- K dctcp

tc -s -d qd sh dev eth1
qdisc fq_codel 8022: root refcnt 32 limit 10240p flows 1024 quantum 9212 target 5ms ce_threshold_ect1 49us interval 100ms memory_limit 32Mb ecn drop_batch 64
 Sent 14388596616 bytes 9543449 pkt (dropped 0, overlimits 0 requeues 152013)
 backlog 0b 0p requeues 152013
  maxpacket 68130 drop_overlimit 0 new_flow_count 95678 ecn_mark 0 ce_mark 7639
  new_flows_len 0 old_flows_len 0

[1] L4S current draft:
https://datatracker.ietf.org/doc/html/draft-ietf-tsvwg-l4s-arch

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Ingemar Johansson S <ingemar.s.johansson@ericsson.com>
Cc: Tom Henderson <tomh@tomh.org>
Cc: Bob Briscoe <in@bobbriscoe.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index ec88590b3198..6be9a84cccfa 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -840,6 +840,7 @@ enum {
 	TCA_FQ_CODEL_CE_THRESHOLD,
 	TCA_FQ_CODEL_DROP_BATCH_SIZE,
 	TCA_FQ_CODEL_MEMORY_LIMIT,
+	TCA_FQ_CODEL_CE_THRESHOLD_ECT1,
 	__TCA_FQ_CODEL_MAX
 };
 
-- 
cgit v1.2.3


From b0539f5eddc2eefd24378bda3ee9cbbca916f58d Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Sat, 16 Oct 2021 11:37:51 +0200
Subject: net/smc: add netlink support for SMC-Rv2

Implement the netlink support for SMC-Rv2 related attributes that are
provided to user space.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/smc.h | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index b175bd0165a1..20f33b27787f 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -84,17 +84,28 @@ enum {
 	SMC_NLA_SYS_IS_ISM_V2,		/* u8 */
 	SMC_NLA_SYS_LOCAL_HOST,		/* string */
 	SMC_NLA_SYS_SEID,		/* string */
+	SMC_NLA_SYS_IS_SMCR_V2,		/* u8 */
 	__SMC_NLA_SYS_MAX,
 	SMC_NLA_SYS_MAX = __SMC_NLA_SYS_MAX - 1
 };
 
-/* SMC_NLA_LGR_V2 nested attributes */
+/* SMC_NLA_LGR_D_V2_COMMON and SMC_NLA_LGR_R_V2_COMMON nested attributes */
 enum {
 	SMC_NLA_LGR_V2_VER,		/* u8 */
 	SMC_NLA_LGR_V2_REL,		/* u8 */
 	SMC_NLA_LGR_V2_OS,		/* u8 */
 	SMC_NLA_LGR_V2_NEG_EID,		/* string */
 	SMC_NLA_LGR_V2_PEER_HOST,	/* string */
+	__SMC_NLA_LGR_V2_MAX,
+	SMC_NLA_LGR_V2_MAX = __SMC_NLA_LGR_V2_MAX - 1
+};
+
+/* SMC_NLA_LGR_R_V2 nested attributes */
+enum {
+	SMC_NLA_LGR_R_V2_UNSPEC,
+	SMC_NLA_LGR_R_V2_DIRECT,	/* u8 */
+	__SMC_NLA_LGR_R_V2_MAX,
+	SMC_NLA_LGR_R_V2_MAX = __SMC_NLA_LGR_R_V2_MAX - 1
 };
 
 /* SMC_GEN_LGR_SMCR attributes */
@@ -106,6 +117,8 @@ enum {
 	SMC_NLA_LGR_R_PNETID,		/* string */
 	SMC_NLA_LGR_R_VLAN_ID,		/* u8 */
 	SMC_NLA_LGR_R_CONNS_NUM,	/* u32 */
+	SMC_NLA_LGR_R_V2_COMMON,	/* nest */
+	SMC_NLA_LGR_R_V2,		/* nest */
 	__SMC_NLA_LGR_R_MAX,
 	SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1
 };
@@ -138,7 +151,7 @@ enum {
 	SMC_NLA_LGR_D_PNETID,		/* string */
 	SMC_NLA_LGR_D_CHID,		/* u16 */
 	SMC_NLA_LGR_D_PAD,		/* flag */
-	SMC_NLA_LGR_V2,			/* nest */
+	SMC_NLA_LGR_D_V2_COMMON,	/* nest */
 	__SMC_NLA_LGR_D_MAX,
 	SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1
 };
-- 
cgit v1.2.3


From e65c26f413718ed2e6d788491adcd8cebc0f44b6 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 29 Sep 2021 12:15:58 +0900
Subject: counter: Move counter enums to uapi header

This is in preparation for a subsequent patch implementing a character
device interface for the Counter subsystem.

Reviewed-by: David Lechner <david@lechnology.com>
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Link: https://lore.kernel.org/r/962a5f2027fafcf4f77c10e1baf520463960d1ee.1632884256.git.vilhelm.gray@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/uapi/linux/counter.h | 56 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 include/uapi/linux/counter.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h
new file mode 100644
index 000000000000..6113938a6044
--- /dev/null
+++ b/include/uapi/linux/counter.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Userspace ABI for Counter character devices
+ * Copyright (C) 2020 William Breathitt Gray
+ */
+#ifndef _UAPI_COUNTER_H_
+#define _UAPI_COUNTER_H_
+
+/* Component scope definitions */
+enum counter_scope {
+	COUNTER_SCOPE_DEVICE,
+	COUNTER_SCOPE_SIGNAL,
+	COUNTER_SCOPE_COUNT,
+};
+
+/* Count direction values */
+enum counter_count_direction {
+	COUNTER_COUNT_DIRECTION_FORWARD,
+	COUNTER_COUNT_DIRECTION_BACKWARD,
+};
+
+/* Count mode values */
+enum counter_count_mode {
+	COUNTER_COUNT_MODE_NORMAL,
+	COUNTER_COUNT_MODE_RANGE_LIMIT,
+	COUNTER_COUNT_MODE_NON_RECYCLE,
+	COUNTER_COUNT_MODE_MODULO_N,
+};
+
+/* Count function values */
+enum counter_function {
+	COUNTER_FUNCTION_INCREASE,
+	COUNTER_FUNCTION_DECREASE,
+	COUNTER_FUNCTION_PULSE_DIRECTION,
+	COUNTER_FUNCTION_QUADRATURE_X1_A,
+	COUNTER_FUNCTION_QUADRATURE_X1_B,
+	COUNTER_FUNCTION_QUADRATURE_X2_A,
+	COUNTER_FUNCTION_QUADRATURE_X2_B,
+	COUNTER_FUNCTION_QUADRATURE_X4,
+};
+
+/* Signal values */
+enum counter_signal_level {
+	COUNTER_SIGNAL_LEVEL_LOW,
+	COUNTER_SIGNAL_LEVEL_HIGH,
+};
+
+/* Action mode values */
+enum counter_synapse_action {
+	COUNTER_SYNAPSE_ACTION_NONE,
+	COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+	COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
+	COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
+};
+
+#endif /* _UAPI_COUNTER_H_ */
-- 
cgit v1.2.3


From b6c50affda5957a3629b149a91c7f6688ffce7f7 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 29 Sep 2021 12:15:59 +0900
Subject: counter: Add character device interface

This patch introduces a character device interface for the Counter
subsystem. Device data is exposed through standard character device read
operations. Device data is gathered when a Counter event is pushed by
the respective Counter device driver. Configuration is handled via ioctl
operations on the respective Counter character device node.

Cc: David Lechner <david@lechnology.com>
Cc: Gwendal Grignou <gwendal@chromium.org>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Link: https://lore.kernel.org/r/b8b8c64b4065aedff43699ad1f0e2f8d1419c15b.1632884256.git.vilhelm.gray@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/uapi/linux/counter.h | 98 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h
index 6113938a6044..d0aa95aeff7b 100644
--- a/include/uapi/linux/counter.h
+++ b/include/uapi/linux/counter.h
@@ -6,6 +6,19 @@
 #ifndef _UAPI_COUNTER_H_
 #define _UAPI_COUNTER_H_
 
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/* Component type definitions */
+enum counter_component_type {
+	COUNTER_COMPONENT_NONE,
+	COUNTER_COMPONENT_SIGNAL,
+	COUNTER_COMPONENT_COUNT,
+	COUNTER_COMPONENT_FUNCTION,
+	COUNTER_COMPONENT_SYNAPSE_ACTION,
+	COUNTER_COMPONENT_EXTENSION,
+};
+
 /* Component scope definitions */
 enum counter_scope {
 	COUNTER_SCOPE_DEVICE,
@@ -13,6 +26,91 @@ enum counter_scope {
 	COUNTER_SCOPE_COUNT,
 };
 
+/**
+ * struct counter_component - Counter component identification
+ * @type: component type (one of enum counter_component_type)
+ * @scope: component scope (one of enum counter_scope)
+ * @parent: parent ID (matching the ID suffix of the respective parent sysfs
+ *          path as described by the ABI documentation file
+ *          Documentation/ABI/testing/sysfs-bus-counter)
+ * @id: component ID (matching the ID provided by the respective *_component_id
+ *      sysfs attribute of the desired component)
+ *
+ * For example, if the Count 2 ceiling extension of Counter device 4 is desired,
+ * set type equal to COUNTER_COMPONENT_EXTENSION, scope equal to
+ * COUNTER_COUNT_SCOPE, parent equal to 2, and id equal to the value provided by
+ * the respective /sys/bus/counter/devices/counter4/count2/ceiling_component_id
+ * sysfs attribute.
+ */
+struct counter_component {
+	__u8 type;
+	__u8 scope;
+	__u8 parent;
+	__u8 id;
+};
+
+/* Event type definitions */
+enum counter_event_type {
+	/* Count value increased past ceiling */
+	COUNTER_EVENT_OVERFLOW,
+	/* Count value decreased past floor */
+	COUNTER_EVENT_UNDERFLOW,
+	/* Count value increased past ceiling, or decreased past floor */
+	COUNTER_EVENT_OVERFLOW_UNDERFLOW,
+	/* Count value reached threshold */
+	COUNTER_EVENT_THRESHOLD,
+	/* Index signal detected */
+	COUNTER_EVENT_INDEX,
+};
+
+/**
+ * struct counter_watch - Counter component watch configuration
+ * @component: component to watch when event triggers
+ * @event: event that triggers (one of enum counter_event_type)
+ * @channel: event channel (typically 0 unless the device supports concurrent
+ *	     events of the same type)
+ */
+struct counter_watch {
+	struct counter_component component;
+	__u8 event;
+	__u8 channel;
+};
+
+/*
+ * Queues a Counter watch for the specified event.
+ *
+ * The queued watches will not be applied until COUNTER_ENABLE_EVENTS_IOCTL is
+ * called.
+ */
+#define COUNTER_ADD_WATCH_IOCTL _IOW(0x3E, 0x00, struct counter_watch)
+/*
+ * Enables monitoring the events specified by the Counter watches that were
+ * queued by COUNTER_ADD_WATCH_IOCTL.
+ *
+ * If events are already enabled, the new set of watches replaces the old one.
+ * Calling this ioctl also has the effect of clearing the queue of watches added
+ * by COUNTER_ADD_WATCH_IOCTL.
+ */
+#define COUNTER_ENABLE_EVENTS_IOCTL _IO(0x3E, 0x01)
+/*
+ * Stops monitoring the previously enabled events.
+ */
+#define COUNTER_DISABLE_EVENTS_IOCTL _IO(0x3E, 0x02)
+
+/**
+ * struct counter_event - Counter event data
+ * @timestamp: best estimate of time of event occurrence, in nanoseconds
+ * @value: component value
+ * @watch: component watch configuration
+ * @status: return status (system error number)
+ */
+struct counter_event {
+	__aligned_u64 timestamp;
+	__aligned_u64 value;
+	struct counter_watch watch;
+	__u8 status;
+};
+
 /* Count direction values */
 enum counter_count_direction {
 	COUNTER_COUNT_DIRECTION_FORWARD,
-- 
cgit v1.2.3


From b416beb25d9317499c823aea1522eefd8d72ea36 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@codeconstruct.com.au>
Date: Mon, 18 Oct 2021 11:29:34 +0800
Subject: mctp: unify sockaddr_mctp types

Use the more precise __kernel_sa_family_t for smctp_family, to match
struct sockaddr.

Also, use an unsigned int for the network member; negative networks
don't make much sense. We're already using unsigned for mctp_dev and
mctp_skb_cb, but need to change mctp_sock to suit.

Fixes: 60fc63981693 ("mctp: Add sockaddr_mctp to uapi")
Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
Acked-by: Eugene Syromiatnikov <esyr@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mctp.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
index 52b54d13f385..f384962d8ff2 100644
--- a/include/uapi/linux/mctp.h
+++ b/include/uapi/linux/mctp.h
@@ -10,6 +10,7 @@
 #define __UAPI_MCTP_H
 
 #include <linux/types.h>
+#include <linux/socket.h>
 
 typedef __u8			mctp_eid_t;
 
@@ -18,8 +19,8 @@ struct mctp_addr {
 };
 
 struct sockaddr_mctp {
-	unsigned short int	smctp_family;
-	int			smctp_network;
+	__kernel_sa_family_t	smctp_family;
+	unsigned int		smctp_network;
 	struct mctp_addr	smctp_addr;
 	__u8			smctp_type;
 	__u8			smctp_tag;
-- 
cgit v1.2.3


From 5a20dd46b8b8459382685969cf0f196ae9fb9766 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@codeconstruct.com.au>
Date: Mon, 18 Oct 2021 11:29:35 +0800
Subject: mctp: Be explicit about struct sockaddr_mctp padding

We currently have some implicit padding in struct sockaddr_mctp. This
patch makes this padding explicit, and ensures we have consistent
layout on platforms with <32bit alignmnent.

Fixes: 60fc63981693 ("mctp: Add sockaddr_mctp to uapi")
Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mctp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
index f384962d8ff2..6acd4ccafbf7 100644
--- a/include/uapi/linux/mctp.h
+++ b/include/uapi/linux/mctp.h
@@ -20,10 +20,12 @@ struct mctp_addr {
 
 struct sockaddr_mctp {
 	__kernel_sa_family_t	smctp_family;
+	__u16			__smctp_pad0;
 	unsigned int		smctp_network;
 	struct mctp_addr	smctp_addr;
 	__u8			smctp_type;
 	__u8			smctp_tag;
+	__u8			__smctp_pad1;
 };
 
 #define MCTP_NET_ANY		0x0
-- 
cgit v1.2.3


From 7bbbbfaa7a1b0b03890f25fba5f28bb8c7ef145a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= <alsi@bang-olufsen.dk>
Date: Mon, 18 Oct 2021 11:37:56 +0200
Subject: ether: add EtherType for proprietary Realtek protocols
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new EtherType ETH_P_REALTEK to the if_ether.h uapi header. The
EtherType 0x8899 is used in a number of different protocols from Realtek
Semiconductor Corp [1], so no general assumptions should be made when
trying to decode such packets. Observed protocols include:

  0x1 - Realtek Remote Control protocol [2]
  0x2 - Echo protocol [2]
  0x3 - Loop detection protocol [2]
  0x4 - RTL8365MB 4- and 8-byte switch CPU tag protocols [3]
  0x9 - RTL8306 switch CPU tag protocol [4]
  0xA - RTL8366RB switch CPU tag protocol [4]

[1] https://lore.kernel.org/netdev/CACRpkdYQthFgjwVzHyK3DeYUOdcYyWmdjDPG=Rf9B3VrJ12Rzg@mail.gmail.com/
[2] https://www.wireshark.org/lists/ethereal-dev/200409/msg00090.html
[3] https://lore.kernel.org/netdev/20210822193145.1312668-4-alvin@pqrs.dk/
[4] https://lore.kernel.org/netdev/20200708122537.1341307-2-linus.walleij@linaro.org/

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 5f589c7a8382..5da4ee234e0b 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -86,6 +86,7 @@
 					 * over Ethernet
 					 */
 #define ETH_P_PAE	0x888E		/* Port Access Entity (IEEE 802.1X) */
+#define ETH_P_REALTEK	0x8899          /* Multiple proprietary protocols */
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_8021AD	0x88A8          /* 802.1ad Service VLAN		*/
 #define ETH_P_802_EX1	0x88B5		/* 802.1 Local Experimental 1.  */
-- 
cgit v1.2.3


From 917425f71f36ce6f61841497040e10d0166106d8 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Mon, 18 Oct 2021 17:19:27 +0200
Subject: rtc: add alarm related features

Add more alarm related features to be declared by drivers.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20211018151933.76865-2-alexandre.belloni@bootlin.com
---
 include/uapi/linux/rtc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h
index f950bff75e97..f4037c541925 100644
--- a/include/uapi/linux/rtc.h
+++ b/include/uapi/linux/rtc.h
@@ -114,7 +114,9 @@ struct rtc_pll_info {
 #define RTC_FEATURE_ALARM		0
 #define RTC_FEATURE_ALARM_RES_MINUTE	1
 #define RTC_FEATURE_NEED_WEEK_DAY	2
-#define RTC_FEATURE_CNT			3
+#define RTC_FEATURE_ALARM_RES_2S	3
+#define RTC_FEATURE_UPDATE_INTERRUPT	4
+#define RTC_FEATURE_CNT			5
 
 #define RTC_MAX_FREQ	8192
 
-- 
cgit v1.2.3


From 6a8af1b6568ad9ee08a419fb12c793f7992cf8a4 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Mon, 18 Oct 2021 17:19:28 +0200
Subject: rtc: add parameter ioctl

Add an ioctl allowing to get and set extra parameters for an RTC. For now,
only handle getting available features.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20211018151933.76865-3-alexandre.belloni@bootlin.com
---
 include/uapi/linux/rtc.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h
index f4037c541925..3241f9ecc639 100644
--- a/include/uapi/linux/rtc.h
+++ b/include/uapi/linux/rtc.h
@@ -14,6 +14,7 @@
 
 #include <linux/const.h>
 #include <linux/ioctl.h>
+#include <linux/types.h>
 
 /*
  * The struct used to pass data via the following ioctl. Similar to the
@@ -66,6 +67,17 @@ struct rtc_pll_info {
 	long pll_clock;     /* base PLL frequency */
 };
 
+struct rtc_param {
+	__u64 param;
+	union {
+		__u64 uvalue;
+		__s64 svalue;
+		__u64 ptr;
+	};
+	__u32 index;
+	__u32 __pad;
+};
+
 /*
  * ioctl calls that are permitted to the /dev/rtc interface, if
  * any of the RTC drivers are enabled.
@@ -95,6 +107,9 @@ struct rtc_pll_info {
 #define RTC_PLL_GET	_IOR('p', 0x11, struct rtc_pll_info)  /* Get PLL correction */
 #define RTC_PLL_SET	_IOW('p', 0x12, struct rtc_pll_info)  /* Set PLL correction */
 
+#define RTC_PARAM_GET	_IOW('p', 0x13, struct rtc_param)  /* Get parameter */
+#define RTC_PARAM_SET	_IOW('p', 0x14, struct rtc_param)  /* Set parameter */
+
 #define RTC_VL_DATA_INVALID	_BITUL(0) /* Voltage too low, RTC data is invalid */
 #define RTC_VL_BACKUP_LOW	_BITUL(1) /* Backup voltage is low */
 #define RTC_VL_BACKUP_EMPTY	_BITUL(2) /* Backup empty or not present */
@@ -118,6 +133,9 @@ struct rtc_pll_info {
 #define RTC_FEATURE_UPDATE_INTERRUPT	4
 #define RTC_FEATURE_CNT			5
 
+/* parameter list */
+#define RTC_PARAM_FEATURES		0
+
 #define RTC_MAX_FREQ	8192
 
 
-- 
cgit v1.2.3


From 2268551935dbf1abcbb4d4fb7b1ad74dbe0d1be0 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Mon, 18 Oct 2021 17:19:29 +0200
Subject: rtc: expose correction feature

Add a new feature for RTCs able to correct the oscillator imprecision. This
is also called offset or trimming. Such drivers have a .set_offset callback,
use that to set the feature bit from the core.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20211018151933.76865-4-alexandre.belloni@bootlin.com
---
 include/uapi/linux/rtc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h
index 3241f9ecc639..c83bb9a4fa4f 100644
--- a/include/uapi/linux/rtc.h
+++ b/include/uapi/linux/rtc.h
@@ -131,7 +131,8 @@ struct rtc_param {
 #define RTC_FEATURE_NEED_WEEK_DAY	2
 #define RTC_FEATURE_ALARM_RES_2S	3
 #define RTC_FEATURE_UPDATE_INTERRUPT	4
-#define RTC_FEATURE_CNT			5
+#define RTC_FEATURE_CORRECTION		5
+#define RTC_FEATURE_CNT			6
 
 /* parameter list */
 #define RTC_PARAM_FEATURES		0
-- 
cgit v1.2.3


From a6d8c6e1a5c6fb982964861dc84c0c7cb0151c7c Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Mon, 18 Oct 2021 17:19:30 +0200
Subject: rtc: add correction parameter

Add a new parameter allowing the get and set the correction using ioctls
instead of just sysfs.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20211018151933.76865-5-alexandre.belloni@bootlin.com
---
 include/uapi/linux/rtc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h
index c83bb9a4fa4f..5debe82439c2 100644
--- a/include/uapi/linux/rtc.h
+++ b/include/uapi/linux/rtc.h
@@ -136,6 +136,7 @@ struct rtc_param {
 
 /* parameter list */
 #define RTC_PARAM_FEATURES		0
+#define RTC_PARAM_CORRECTION		1
 
 #define RTC_MAX_FREQ	8192
 
-- 
cgit v1.2.3


From 0d20e9fb1262b1f9ac895b287db892bc75b05b84 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Mon, 18 Oct 2021 17:19:31 +0200
Subject: rtc: add BSM parameter

BSM or Backup Switch Mode is a common feature on RTCs, allowing to select
how the RTC will decide when to switch from its primary power supply to the
backup power supply. It is necessary to be able to set it from userspace as
there are uses cases where it has to be done dynamically.

Supported values are:
  RTC_BSM_DISABLED: disabled
  RTC_BSM_DIRECT: switching will happen as soon as Vbackup > Vdd
  RTC_BSM_LEVEL: switching will happen around a threshold, usually with an
  hysteresis
  RTC_BSM_STANDBY: switching will not happen until Vdd > Vbackup, this is
  useful to ensure the RTC doesn't draw any power until the device is first
  powered on.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20211018151933.76865-6-alexandre.belloni@bootlin.com
---
 include/uapi/linux/rtc.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h
index 5debe82439c2..03e5b776e597 100644
--- a/include/uapi/linux/rtc.h
+++ b/include/uapi/linux/rtc.h
@@ -132,11 +132,18 @@ struct rtc_param {
 #define RTC_FEATURE_ALARM_RES_2S	3
 #define RTC_FEATURE_UPDATE_INTERRUPT	4
 #define RTC_FEATURE_CORRECTION		5
-#define RTC_FEATURE_CNT			6
+#define RTC_FEATURE_BACKUP_SWITCH_MODE	6
+#define RTC_FEATURE_CNT			7
 
 /* parameter list */
 #define RTC_PARAM_FEATURES		0
 #define RTC_PARAM_CORRECTION		1
+#define RTC_PARAM_BACKUP_SWITCH_MODE	2
+
+#define RTC_BSM_DISABLED	0
+#define RTC_BSM_DIRECT		1
+#define RTC_BSM_LEVEL		2
+#define RTC_BSM_STANDBY		3
 
 #define RTC_MAX_FREQ	8192
 
-- 
cgit v1.2.3


From c68dc1b577eabd5605c6c7c08f3e07ae18d30d5d Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Thu, 16 Sep 2021 18:15:35 +0000
Subject: KVM: x86: Report host tsc and realtime values in KVM_GET_CLOCK

Handling the migration of TSCs correctly is difficult, in part because
Linux does not provide userspace with the ability to retrieve a (TSC,
realtime) clock pair for a single instant in time. In lieu of a more
convenient facility, KVM can report similar information in the kvm_clock
structure.

Provide userspace with a host TSC & realtime pair iff the realtime clock
is based on the TSC. If userspace provides KVM_SET_CLOCK with a valid
realtime value, advance the KVM clock by the amount of elapsed time. Do
not step the KVM clock backwards, though, as it is a monotonic
oscillator.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210916181538.968978-5-oupton@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 322b4b588d75..5ca5ffe16cb4 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1231,11 +1231,16 @@ struct kvm_irqfd {
 
 /* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags.  */
 #define KVM_CLOCK_TSC_STABLE		2
+#define KVM_CLOCK_REALTIME		(1 << 2)
+#define KVM_CLOCK_HOST_TSC		(1 << 3)
 
 struct kvm_clock_data {
 	__u64 clock;
 	__u32 flags;
-	__u32 pad[9];
+	__u32 pad0;
+	__u64 realtime;
+	__u64 host_tsc;
+	__u32 pad[4];
 };
 
 /* For KVM_CAP_SW_TLB */
-- 
cgit v1.2.3


From 3080ea5553cc909b000d1f1d964a9041962f2c5b Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 9 Aug 2021 11:21:23 -0700
Subject: stddef: Introduce DECLARE_FLEX_ARRAY() helper

There are many places where kernel code wants to have several different
typed trailing flexible arrays. This would normally be done with multiple
flexible arrays in a union, but since GCC and Clang don't (on the surface)
allow this, there have been many open-coded workarounds, usually involving
neighboring 0-element arrays at the end of a structure. For example,
instead of something like this:

struct thing {
	...
	union {
		struct type1 foo[];
		struct type2 bar[];
	};
};

code works around the compiler with:

struct thing {
	...
	struct type1 foo[0];
	struct type2 bar[];
};

Another case is when a flexible array is wanted as the single member
within a struct (which itself is usually in a union). For example, this
would be worked around as:

union many {
	...
	struct {
		struct type3 baz[0];
	};
};

These kinds of work-arounds cause problems with size checks against such
zero-element arrays (for example when building with -Warray-bounds and
-Wzero-length-bounds, and with the coming FORTIFY_SOURCE improvements),
so they must all be converted to "real" flexible arrays, avoiding warnings
like this:

fs/hpfs/anode.c: In function 'hpfs_add_sector_to_btree':
fs/hpfs/anode.c:209:27: warning: array subscript 0 is outside the bounds of an interior zero-length array 'struct bplus_internal_node[0]' [-Wzero-length-bounds]
  209 |    anode->btree.u.internal[0].down = cpu_to_le32(a);
      |    ~~~~~~~~~~~~~~~~~~~~~~~^~~
In file included from fs/hpfs/hpfs_fn.h:26,
                 from fs/hpfs/anode.c:10:
fs/hpfs/hpfs.h:412:32: note: while referencing 'internal'
  412 |     struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving
      |                                ^~~~~~~~

drivers/net/can/usb/etas_es58x/es58x_fd.c: In function 'es58x_fd_tx_can_msg':
drivers/net/can/usb/etas_es58x/es58x_fd.c:360:35: warning: array subscript 65535 is outside the bounds of an interior zero-length array 'u8[0]' {aka 'unsigned char[]'} [-Wzero-length-bounds]
  360 |  tx_can_msg = (typeof(tx_can_msg))&es58x_fd_urb_cmd->raw_msg[msg_len];
      |                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from drivers/net/can/usb/etas_es58x/es58x_core.h:22,
                 from drivers/net/can/usb/etas_es58x/es58x_fd.c:17:
drivers/net/can/usb/etas_es58x/es58x_fd.h:231:6: note: while referencing 'raw_msg'
  231 |   u8 raw_msg[0];
      |      ^~~~~~~

However, it _is_ entirely possible to have one or more flexible arrays
in a struct or union: it just has to be in another struct. And since it
cannot be alone in a struct, such a struct must have at least 1 other
named member -- but that member can be zero sized. Wrap all this nonsense
into the new DECLARE_FLEX_ARRAY() in support of having flexible arrays
in unions (or alone in a struct).

As with struct_group(), since this is needed in UAPI headers as well,
implement the core there, with a non-UAPI wrapper.

Additionally update kernel-doc to understand its existence.

https://github.com/KSPP/linux/issues/137

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/stddef.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
index 610204f7c275..3021ea25a284 100644
--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
@@ -25,3 +25,19 @@
 		struct { MEMBERS } ATTRS; \
 		struct TAG { MEMBERS } ATTRS NAME; \
 	}
+
+/**
+ * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
+ *
+ * @TYPE: The type of each flexible array element
+ * @NAME: The name of the flexible array member
+ *
+ * In order to have a flexible array member in a union or alone in a
+ * struct, it needs to be wrapped in an anonymous struct with at least 1
+ * named member, but that member can be empty.
+ */
+#define __DECLARE_FLEX_ARRAY(TYPE, NAME)	\
+	struct { \
+		struct { } __empty_ ## NAME; \
+		TYPE NAME[]; \
+	}
-- 
cgit v1.2.3


From 47c662486cccf03e7062139d069b07ab0126ef59 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 13 Aug 2021 12:19:24 -0700
Subject: treewide: Replace 0-element memcpy() destinations with flexible
 arrays

The 0-element arrays that are used as memcpy() destinations are actually
flexible arrays. Adjust their structures accordingly so that memcpy()
can better reason able their destination size (i.e. they need to be seen
as "unknown" length rather than "zero").

In some cases, use of the DECLARE_FLEX_ARRAY() helper is needed when a
flexible array is alone in a struct.

Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Nilesh Javali <njavali@marvell.com>
Cc: Manish Rangankar <mrangankar@marvell.com>
Cc: GR-QLogic-Storage-Upstream@marvell.com
Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Florian Schilhabel <florian.c.schilhabel@googlemail.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: Fabio Aiuto <fabioaiuto83@gmail.com>
Cc: Ross Schmidt <ross.schm.dev@gmail.com>
Cc: Marco Cesati <marcocesati@gmail.com>
Cc: ath10k@lists.infradead.org
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: linux-scsi@vger.kernel.org
Cc: linux-staging@lists.linux.dev
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/dlm_device.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dlm_device.h b/include/uapi/linux/dlm_device.h
index f880d2831160..e83954c69fff 100644
--- a/include/uapi/linux/dlm_device.h
+++ b/include/uapi/linux/dlm_device.h
@@ -45,13 +45,13 @@ struct dlm_lock_params {
 	void __user *bastaddr;
 	struct dlm_lksb __user *lksb;
 	char lvb[DLM_USER_LVB_LEN];
-	char name[0];
+	char name[];
 };
 
 struct dlm_lspace_params {
 	__u32 flags;
 	__u32 minor;
-	char name[0];
+	char name[];
 };
 
 struct dlm_purge_params {
-- 
cgit v1.2.3


From 223f903e9c832699f4e5f422281a60756c1c6cfe Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 12 Oct 2021 09:48:38 -0700
Subject: bpf: Rename BTF_KIND_TAG to BTF_KIND_DECL_TAG

Patch set [1] introduced BTF_KIND_TAG to allow tagging
declarations for struct/union, struct/union field, var, func
and func arguments and these tags will be encoded into
dwarf. They are also encoded to btf by llvm for the bpf target.

After BTF_KIND_TAG is introduced, we intended to use it
for kernel __user attributes. But kernel __user is actually
a type attribute. Upstream and internal discussion showed
it is not a good idea to mix declaration attribute and
type attribute. So we proposed to introduce btf_type_tag
as a type attribute and existing btf_tag renamed to
btf_decl_tag ([2]).

This patch renamed BTF_KIND_TAG to BTF_KIND_DECL_TAG and some
other declarations with *_tag to *_decl_tag to make it clear
the tag is for declaration. In the future, BTF_KIND_TYPE_TAG
might be introduced per [3].

 [1] https://lore.kernel.org/bpf/20210914223004.244411-1-yhs@fb.com/
 [2] https://reviews.llvm.org/D111588
 [3] https://reviews.llvm.org/D111199

Fixes: b5ea834dde6b ("bpf: Support for new btf kind BTF_KIND_TAG")
Fixes: 5b84bd10363e ("libbpf: Add support for BTF_KIND_TAG")
Fixes: 5c07f2fec003 ("bpftool: Add support for BTF_KIND_TAG")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211012164838.3345699-1-yhs@fb.com
---
 include/uapi/linux/btf.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 642b6ecb37d7..deb12f755f0f 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -43,7 +43,7 @@ struct btf_type {
 	 * "size" tells the size of the type it is describing.
 	 *
 	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
-	 * FUNC, FUNC_PROTO, VAR and TAG.
+	 * FUNC, FUNC_PROTO, VAR and DECL_TAG.
 	 * "type" is a type_id referring to another type.
 	 */
 	union {
@@ -74,7 +74,7 @@ enum {
 	BTF_KIND_VAR		= 14,	/* Variable	*/
 	BTF_KIND_DATASEC	= 15,	/* Section	*/
 	BTF_KIND_FLOAT		= 16,	/* Floating point	*/
-	BTF_KIND_TAG		= 17,	/* Tag */
+	BTF_KIND_DECL_TAG	= 17,	/* Decl Tag */
 
 	NR_BTF_KINDS,
 	BTF_KIND_MAX		= NR_BTF_KINDS - 1,
@@ -174,14 +174,14 @@ struct btf_var_secinfo {
 	__u32	size;
 };
 
-/* BTF_KIND_TAG is followed by a single "struct btf_tag" to describe
+/* BTF_KIND_DECL_TAG is followed by a single "struct btf_decl_tag" to describe
  * additional information related to the tag applied location.
  * If component_idx == -1, the tag is applied to a struct, union,
  * variable or function. Otherwise, it is applied to a struct/union
  * member or a func argument, and component_idx indicates which member
  * or argument (0 ... vlen-1).
  */
-struct btf_tag {
+struct btf_decl_tag {
        __s32   component_idx;
 };
 
-- 
cgit v1.2.3


From 6224590d242fc8c6b26941328e02a40b4384949b Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sat, 2 Oct 2021 19:36:14 +0100
Subject: io_uring: add flag to not fail link after timeout

For some reason non-off IORING_OP_TIMEOUT always fails links, it's
pretty inconvenient and unnecessary limits chaining after it to hard
linking, which is far from ideal, e.g. doesn't pair well with timeout
cancellation. Add a flag forcing it to not fail links on -ETIME.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/17c7ec0fb7a6113cc6be8cdaedcada0ba836ac0e.1633199723.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index b270a07b285e..c45b5e9a9387 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -158,6 +158,7 @@ enum {
 #define IORING_TIMEOUT_BOOTTIME		(1U << 2)
 #define IORING_TIMEOUT_REALTIME		(1U << 3)
 #define IORING_LINK_TIMEOUT_UPDATE	(1U << 4)
+#define IORING_TIMEOUT_ETIME_SUCCESS	(1U << 5)
 #define IORING_TIMEOUT_CLOCK_MASK	(IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
 #define IORING_TIMEOUT_UPDATE_MASK	(IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
 /*
-- 
cgit v1.2.3


From f4c6217f7f5936f7173d028559ff5d25cce10816 Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Wed, 6 Oct 2021 19:36:51 +0530
Subject: perf: Add comment about current state of PERF_MEM_LVL_* namespace and
 remove an extra line

Add a comment about PERF_MEM_LVL_* namespace being depricated
to some extent in favour of added PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_}
fields.

Remove an extra line present in perf_mem__lvl_scnprintf function.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211006140654.298352-2-kjain@linux.ibm.com
---
 include/uapi/linux/perf_event.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index c89535de1ec8..a74538c5b4b5 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1256,7 +1256,13 @@ union perf_mem_data_src {
 #define PERF_MEM_OP_EXEC	0x10 /* code (execution) */
 #define PERF_MEM_OP_SHIFT	0
 
-/* memory hierarchy (memory level, hit or miss) */
+/*
+ * PERF_MEM_LVL_* namespace being depricated to some extent in the
+ * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields.
+ * Supporting this namespace inorder to not break defined ABIs.
+ *
+ * memory hierarchy (memory level, hit or miss)
+ */
 #define PERF_MEM_LVL_NA		0x01  /* not available */
 #define PERF_MEM_LVL_HIT	0x02  /* hit level */
 #define PERF_MEM_LVL_MISS	0x04  /* miss level  */
-- 
cgit v1.2.3


From fec9cc6175d0ec1e13efe12be491d9bd4de62f80 Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Wed, 6 Oct 2021 19:36:52 +0530
Subject: perf: Add mem_hops field in perf_mem_data_src structure

Going forward, future generation systems can have more hierarchy
within the node/package level but currently we don't have any data source
encoding field in perf, which can be used to represent this level of data.

Add a new field called 'mem_hops' in the perf_mem_data_src structure
which can be used to represent intra-node/package or inter-node/off-package
details. This field is of size 3 bits where PERF_MEM_HOPS_{NA, 0..6} value
can be used to present different hop levels data.

Also add corresponding macros to define mem_hop field values
and shift value.

Currently we define macro for HOPS_0 which corresponds
to data coming from another core but same node.

For ex: Encodings for mem_hops fields with L2 cache:

L2			- local L2
L2 | REMOTE | HOPS_0	- remote core, same node L2

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211006140654.298352-3-kjain@linux.ibm.com
---
 include/uapi/linux/perf_event.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index a74538c5b4b5..bd8860eeb291 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1225,14 +1225,16 @@ union perf_mem_data_src {
 			mem_remote:1,   /* remote */
 			mem_snoopx:2,	/* snoop mode, ext */
 			mem_blk:3,	/* access blocked */
-			mem_rsvd:21;
+			mem_hops:3,	/* hop level */
+			mem_rsvd:18;
 	};
 };
 #elif defined(__BIG_ENDIAN_BITFIELD)
 union perf_mem_data_src {
 	__u64 val;
 	struct {
-		__u64	mem_rsvd:21,
+		__u64	mem_rsvd:18,
+			mem_hops:3,	/* hop level */
 			mem_blk:3,	/* access blocked */
 			mem_snoopx:2,	/* snoop mode, ext */
 			mem_remote:1,   /* remote */
@@ -1328,6 +1330,11 @@ union perf_mem_data_src {
 #define PERF_MEM_BLK_ADDR	0x04 /* address conflict */
 #define PERF_MEM_BLK_SHIFT	40
 
+/* hop level */
+#define PERF_MEM_HOPS_0		0x01 /* remote core, same node */
+/* 2-7 available */
+#define PERF_MEM_HOPS_SHIFT	43
+
 #define PERF_MEM_S(a, s) \
 	(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
 
-- 
cgit v1.2.3


From 0a2b3e363566c4cc8792d37c5e73b9d9295e075c Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 20 Oct 2021 22:38:06 +0800
Subject: bcache: reserve never used bits from bkey.high

There sre 3 bits in member high of struct bkey are never used, and no
plan to support them in future,
- HEADER_SIZE, start at bit 58, length 2 bits
- KEY_PINNED,  start at bit 55, length 1 bit

No any kernel code, or user space tool references or accesses the three
bits. Therefore it is possible and feasible to reserve the valuable bits
from bkey.high. They can be used in future for other purpose.

Signed-off-by: Coly Li <colyli@suse.de>
Link: https://lore.kernel.org/r/20211020143812.6403-3-colyli@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/bcache.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index cf7399f03b71..97413586195b 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -43,9 +43,9 @@ static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v)	\
 #define KEY_MAX_U64S		8
 
 KEY_FIELD(KEY_PTRS,	high, 60, 3)
-KEY_FIELD(HEADER_SIZE,	high, 58, 2)
+KEY_FIELD(__PAD0,	high, 58, 2)
 KEY_FIELD(KEY_CSUM,	high, 56, 2)
-KEY_FIELD(KEY_PINNED,	high, 55, 1)
+KEY_FIELD(__PAD1,	high, 55, 1)
 KEY_FIELD(KEY_DIRTY,	high, 36, 1)
 
 KEY_FIELD(KEY_SIZE,	high, 20, KEY_SIZE_BITS)
-- 
cgit v1.2.3


From 7aea2c0b48a568e6732c1d30516febe36bf555f1 Mon Sep 17 00:00:00 2001
From: Michael Tretter <m.tretter@pengutronix.de>
Date: Wed, 8 Sep 2021 14:03:15 +0100
Subject: media: allegro: add control to disable encoder buffer

The encoder buffer can have a negative impact on the quality of the
encoded video.

Add a control to allow user space to disable the encoder buffer per
channel if the VCU supports the encoder buffer but the quality is not
sufficient.

Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/v4l2-controls.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 133e20444939..5fea5feb0412 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -211,6 +211,11 @@ enum v4l2_colorfx {
  * We reserve 128 controls for this driver.
  */
 #define V4L2_CID_USER_CCS_BASE			(V4L2_CID_USER_BASE + 0x10f0)
+/*
+ * The base for Allegro driver controls.
+ * We reserve 16 controls for this driver.
+ */
+#define V4L2_CID_USER_ALLEGRO_BASE		(V4L2_CID_USER_BASE + 0x1170)
 
 /* MPEG-class control IDs */
 /* The MPEG controls are applicable to all codec controls
-- 
cgit v1.2.3


From dfcb63ce1de6b10ba98dee928f9463f37e5a5512 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Tue, 19 Oct 2021 19:47:09 +0200
Subject: fq_codel: generalise ce_threshold marking for subset of traffic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit e72aeb9ee0e3 ("fq_codel: implement L4S style ce_threshold_ect1
marking") expanded the ce_threshold feature of FQ-CoDel so it can
be applied to a subset of the traffic, using the ECT(1) bit of the ECN
field as the classifier. However, hard-coding ECT(1) as the only
classifier for this feature seems limiting, so let's expand it to be more
general.

To this end, change the parameter from a ce_threshold_ect1 boolean, to a
one-byte selector/mask pair (ce_threshold_{selector,mask}) which is applied
to the whole diffserv/ECN field in the IP header. This makes it possible to
classify packets by any value in either the ECN field or the diffserv
field. In particular, setting a selector of INET_ECN_ECT_1 and a mask of
INET_ECN_MASK corresponds to the functionality before this patch, and a
mask of ~INET_ECN_MASK allows using the selector as a straight-forward
match against a diffserv code point:

 # apply ce_threshold to ECT(1) traffic
 tc qdisc replace dev eth0 root fq_codel ce_threshold 1ms ce_threshold_selector 0x1/0x3

 # apply ce_threshold to ECN-capable traffic marked as diffserv AF22
 tc qdisc replace dev eth0 root fq_codel ce_threshold 1ms ce_threshold_selector 0x50/0xfc

Regardless of the selector chosen, the normal rules for ECN-marking of
packets still apply, i.e., the flow must still declare itself ECN-capable
by setting one of the bits in the ECN field to get marked at all.

v2:
- Add tc usage examples to patch description

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20211019174709.69081-1-toke@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/pkt_sched.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 6be9a84cccfa..f292b467b27f 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -840,7 +840,8 @@ enum {
 	TCA_FQ_CODEL_CE_THRESHOLD,
 	TCA_FQ_CODEL_DROP_BATCH_SIZE,
 	TCA_FQ_CODEL_MEMORY_LIMIT,
-	TCA_FQ_CODEL_CE_THRESHOLD_ECT1,
+	TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR,
+	TCA_FQ_CODEL_CE_THRESHOLD_MASK,
 	__TCA_FQ_CODEL_MAX
 };
 
-- 
cgit v1.2.3


From 1add667da24273631d4b1f5529789ec5253227f6 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Wed, 20 Oct 2021 08:11:47 +0300
Subject: nl80211: vendor-cmd: intel: add more details for
 IWL_MVM_VENDOR_CMD_HOST_GET_OWNERSHIP

Explain more the expected flow for this command.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Link: https://lore.kernel.org/r/20211020051147.29297-1-emmanuel.grumbach@intel.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211-vnd-intel.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211-vnd-intel.h b/include/uapi/linux/nl80211-vnd-intel.h
index 0bf177b84fd9..4ed7d0b24512 100644
--- a/include/uapi/linux/nl80211-vnd-intel.h
+++ b/include/uapi/linux/nl80211-vnd-intel.h
@@ -13,6 +13,35 @@
  * enum iwl_mvm_vendor_cmd - supported vendor commands
  * @IWL_MVM_VENDOR_CMD_GET_CSME_CONN_INFO: reports CSME connection info.
  * @IWL_MVM_VENDOR_CMD_HOST_GET_OWNERSHIP: asks for ownership on the device.
+ *	This is useful when the CSME firmware owns the device and the kernel
+ *	wants to use it. In case the CSME firmware has no connection active the
+ *	kernel will manage on its own to get ownership of the device.
+ *	When the CSME firmware has an active connection, the user space
+ *	involvement is required. The kernel will assert the RFKILL signal with
+ *	the "device not owned" reason so that nobody can touch the device. Then
+ *	the user space can run the following flow to be able to get connected
+ *	to the very same AP the CSME firmware is currently connected to:
+ *
+ *	1) The user space (NetworkManager) boots and sees that the device is
+ *	    in RFKILL because the host doesn't own the device
+ *	2) The user space asks the kernel what AP the CSME firmware is
+ *	   connected to (with %IWL_MVM_VENDOR_CMD_GET_CSME_CONN_INFO)
+ *	3) The user space checks if it has a profile that matches the reply
+ *	   from the CSME firmware
+ *	4) The user space installs a network to the wpa_supplicant with a
+ *	   specific BSSID and a specific frequency
+ *	5) The user space prevents any type of full scan
+ *	6) The user space asks iwlmei to request ownership on the device (with
+ *	   this command)
+ *	7) iwlmei requests ownership from the CSME firmware
+ *	8) The CSME firmware grants ownership
+ *	9) iwlmei tells iwlwifi to lift the RFKILL
+ *	10) RFKILL OFF is reported to user space
+ *	11) The host boots the device, loads the firwmare, and connects to a
+ *	    specific BSSID without scanning including IP as fast as it can
+ *	12) The host reports to the CSME firmware that there is a connection
+ *	13) The TCP connection is preserved and the host has connectivity
+ *
  * @IWL_MVM_VENDOR_CMD_ROAMING_FORBIDDEN_EVENT: notifies if roaming is allowed.
  *	It contains a &IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN and a
  *	&IWL_MVM_VENDOR_ATTR_VIF_ADDR attributes.
-- 
cgit v1.2.3


From 63fa04266629b9559d66c4dc18b03e0f9fc04a02 Mon Sep 17 00:00:00 2001
From: Srinivasan Raju <srini.raju@purelifi.com>
Date: Mon, 18 Oct 2021 11:00:54 +0100
Subject: nl80211: Add LC placeholder band definition to nl80211_band

Define LC band which is a draft under IEEE 802.11bb.
Current NL80211_BAND_LC is a placeholder band and
will be more defined IEEE 802.11bb progresses.

Signed-off-by: Srinivasan Raju <srini.raju@purelifi.com>
Link: https://lore.kernel.org/r/20211018100143.7565-2-srini.raju@purelifi.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index eda608b1eb09..6b816ef0155f 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4978,6 +4978,7 @@ enum nl80211_txrate_gi {
  * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 69.12 GHz)
  * @NL80211_BAND_6GHZ: around 6 GHz band (5.9 - 7.2 GHz)
  * @NL80211_BAND_S1GHZ: around 900MHz, supported by S1G PHYs
+ * @NL80211_BAND_LC: light communication band (placeholder)
  * @NUM_NL80211_BANDS: number of bands, avoid using this in userspace
  *	since newer kernel versions may support more bands
  */
@@ -4987,6 +4988,7 @@ enum nl80211_band {
 	NL80211_BAND_60GHZ,
 	NL80211_BAND_6GHZ,
 	NL80211_BAND_S1GHZ,
+	NL80211_BAND_LC,
 
 	NUM_NL80211_BANDS,
 };
-- 
cgit v1.2.3


From f9d366d420af4ce8719c59e60853573c02831f61 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 21 Oct 2021 17:30:39 +0200
Subject: cfg80211: fix kernel-doc for MBSSID EMA

The struct member ema_max_profile_periodicity was listed
with the wrong name in the kernel-doc, fix that.

Link: https://lore.kernel.org/r/20211021173038.18ec2030c66b.Iac731bb299525940948adad2c41f514b7dd81c47@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 6b816ef0155f..61cab81e920d 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -7424,7 +7424,7 @@ enum nl80211_sar_specs_attrs {
  * @NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY: Used by the kernel
  *	to advertise the maximum profile periodicity supported by the driver
  *	if EMA is enabled. Driver should indicate EMA support to the userspace
- *	by setting wiphy->mbssid_max_ema_profile_periodicity to
+ *	by setting wiphy->ema_max_profile_periodicity to
  *	a non-zero value.
  *
  * @NL80211_MBSSID_CONFIG_ATTR_INDEX: Mandatory parameter to pass the index of
@@ -7443,7 +7443,7 @@ enum nl80211_sar_specs_attrs {
  *
  * @NL80211_MBSSID_CONFIG_ATTR_EMA: Flag used to enable EMA AP feature.
  *	Setting this flag is permitted only if the driver advertises EMA support
- *	by setting wiphy->mbssid_max_ema_profile_periodicity to non-zero.
+ *	by setting wiphy->ema_max_profile_periodicity to non-zero.
  *
  * @__NL80211_MBSSID_CONFIG_ATTR_LAST: Internal
  * @NL80211_MBSSID_CONFIG_ATTR_MAX: highest attribute
-- 
cgit v1.2.3


From c353d7ce76bfbb87a89155616916cea1f0f1e78e Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 19 Oct 2021 16:45:00 +0200
Subject: uapi: Add <linux/map_to_14segment.h>

Add a header file providing translation primitives and tables for the
conversion of (ASCII) characters to a 14-segments notation, as used by
14-segment alphanumeric displays.

This follows the spirit of include/uapi/linux/map_to_7segment.h.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 include/uapi/linux/map_to_14segment.h | 241 ++++++++++++++++++++++++++++++++++
 1 file changed, 241 insertions(+)
 create mode 100644 include/uapi/linux/map_to_14segment.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/map_to_14segment.h b/include/uapi/linux/map_to_14segment.h
new file mode 100644
index 000000000000..0346ef76543b
--- /dev/null
+++ b/include/uapi/linux/map_to_14segment.h
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2021 Glider bv
+ *
+ * Based on include/uapi/linux/map_to_7segment.h:
+
+ * Copyright (c) 2005 Henk Vergonet <Henk.Vergonet@gmail.com>
+ */
+
+#ifndef MAP_TO_14SEGMENT_H
+#define MAP_TO_14SEGMENT_H
+
+/* This file provides translation primitives and tables for the conversion
+ * of (ASCII) characters to a 14-segments notation.
+ *
+ * The 14 segment's wikipedia notation below is used as standard.
+ * See: https://en.wikipedia.org/wiki/Fourteen-segment_display
+ *
+ * Notation:	+---a---+
+ *		|\  |  /|
+ *		f h i j b
+ *		|  \|/  |
+ *		+-g1+-g2+
+ *		|  /|\  |
+ *		e k l m c
+ *		|/  |  \|
+ *		+---d---+
+ *
+ * Usage:
+ *
+ *   Register a map variable, and fill it with a character set:
+ *	static SEG14_DEFAULT_MAP(map_seg14);
+ *
+ *
+ *   Then use for conversion:
+ *	seg14 = map_to_seg14(&map_seg14, some_char);
+ *	...
+ *
+ * In device drivers it is recommended, if required, to make the char map
+ * accessible via the sysfs interface using the following scheme:
+ *
+ * static ssize_t map_seg14_show(struct device *dev,
+ *				 struct device_attribute *attr, char *buf)
+ * {
+ *	memcpy(buf, &map_seg14, sizeof(map_seg14));
+ *	return sizeof(map_seg14);
+ * }
+ * static ssize_t map_seg14_store(struct device *dev,
+ *				  struct device_attribute *attr,
+ *				  const char *buf, size_t cnt)
+ * {
+ *	if (cnt != sizeof(map_seg14))
+ *		return -EINVAL;
+ *	memcpy(&map_seg14, buf, cnt);
+ *	return cnt;
+ * }
+ * static DEVICE_ATTR_RW(map_seg14);
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+
+#include <asm/byteorder.h>
+
+#define BIT_SEG14_A		0
+#define BIT_SEG14_B		1
+#define BIT_SEG14_C		2
+#define BIT_SEG14_D		3
+#define BIT_SEG14_E		4
+#define BIT_SEG14_F		5
+#define BIT_SEG14_G1		6
+#define BIT_SEG14_G2		7
+#define BIT_SEG14_H		8
+#define BIT_SEG14_I		9
+#define BIT_SEG14_J		10
+#define BIT_SEG14_K		11
+#define BIT_SEG14_L		12
+#define BIT_SEG14_M		13
+#define BIT_SEG14_RESERVED1	14
+#define BIT_SEG14_RESERVED2	15
+
+struct seg14_conversion_map {
+	__be16 table[128];
+};
+
+static __inline__ int map_to_seg14(struct seg14_conversion_map *map, int c)
+{
+	if (c < 0 || c >= sizeof(map->table) / sizeof(map->table[0]))
+		return -EINVAL;
+
+	return __be16_to_cpu(map->table[c]);
+}
+
+#define SEG14_CONVERSION_MAP(_name, _map)	\
+	struct seg14_conversion_map _name = { .table = { _map } }
+
+/*
+ * It is recommended to use a facility that allows user space to redefine
+ * custom character sets for LCD devices. Please use a sysfs interface
+ * as described above.
+ */
+#define MAP_TO_SEG14_SYSFS_FILE	"map_seg14"
+
+/*******************************************************************************
+ * ASCII conversion table
+ ******************************************************************************/
+
+#define _SEG14(sym, a, b, c, d, e, f, g1, g2, h, j, k, l, m, n)	\
+	__cpu_to_be16( a << BIT_SEG14_A  |  b << BIT_SEG14_B  |	\
+		       c << BIT_SEG14_C  |  d << BIT_SEG14_D  |	\
+		       e << BIT_SEG14_E  |  f << BIT_SEG14_F  |	\
+		      g1 << BIT_SEG14_G1 | g2 << BIT_SEG14_G2 |	\
+		       h << BIT_SEG14_H  |  j << BIT_SEG14_I  |	\
+		       k << BIT_SEG14_J  |  l << BIT_SEG14_K  |	\
+		       m << BIT_SEG14_L  |  n << BIT_SEG14_M )
+
+#define _MAP_0_32_ASCII_SEG14_NON_PRINTABLE				\
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+
+#define _MAP_33_47_ASCII_SEG14_SYMBOL				\
+	_SEG14('!', 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('"', 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0),	\
+	_SEG14('#', 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0),	\
+	_SEG14('$', 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0),	\
+	_SEG14('%', 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0),	\
+	_SEG14('&', 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1),	\
+	_SEG14('\'',0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0),	\
+	_SEG14('(', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1),	\
+	_SEG14(')', 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0),	\
+	_SEG14('*', 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1),	\
+	_SEG14('+', 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0),	\
+	_SEG14(',', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0),	\
+	_SEG14('-', 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('.', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),	\
+	_SEG14('/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0),
+
+#define _MAP_48_57_ASCII_SEG14_NUMERIC				\
+	_SEG14('0', 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0),	\
+	_SEG14('1', 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0),	\
+	_SEG14('2', 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('3', 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('4', 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('5', 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1),	\
+	_SEG14('6', 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('7', 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0),	\
+	_SEG14('8', 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('9', 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0),
+
+#define _MAP_58_64_ASCII_SEG14_SYMBOL				\
+	_SEG14(':', 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0),	\
+	_SEG14(';', 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0),	\
+	_SEG14('<', 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1),	\
+	_SEG14('=', 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('>', 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0),	\
+	_SEG14('?', 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0),	\
+	_SEG14('@', 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0),
+
+#define _MAP_65_90_ASCII_SEG14_ALPHA_UPPER			\
+	_SEG14('A', 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('B', 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0),	\
+	_SEG14('C', 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('D', 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0),	\
+	_SEG14('E', 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('F', 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('G', 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('H', 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('I', 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0),	\
+	_SEG14('J', 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('K', 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1),	\
+	_SEG14('L', 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('M', 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0),	\
+	_SEG14('N', 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1),	\
+	_SEG14('O', 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('P', 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('Q', 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1),	\
+	_SEG14('R', 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1),	\
+	_SEG14('S', 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('T', 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0),	\
+	_SEG14('U', 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('V', 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0),	\
+	_SEG14('W', 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1),	\
+	_SEG14('X', 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1),	\
+	_SEG14('Y', 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0),	\
+	_SEG14('Z', 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0),
+
+#define _MAP_91_96_ASCII_SEG14_SYMBOL				\
+	_SEG14('[', 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('\\',0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1),	\
+	_SEG14(']', 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('^', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1),	\
+	_SEG14('_', 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('`', 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0),
+
+#define _MAP_97_122_ASCII_SEG14_ALPHA_LOWER			\
+	_SEG14('a', 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0),	\
+	_SEG14('b', 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1),	\
+	_SEG14('c', 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('d', 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0),	\
+	_SEG14('e', 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0),	\
+	_SEG14('f', 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0),	\
+	_SEG14('g', 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0),	\
+	_SEG14('h', 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0),	\
+	_SEG14('i', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0),	\
+	_SEG14('j', 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0),	\
+	_SEG14('k', 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1),	\
+	_SEG14('l', 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('m', 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0),	\
+	_SEG14('n', 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0),	\
+	_SEG14('o', 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('p', 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0),	\
+	_SEG14('q', 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0),	\
+	_SEG14('r', 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('s', 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1),	\
+	_SEG14('t', 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('u', 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0),	\
+	_SEG14('v', 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0),	\
+	_SEG14('w', 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1),	\
+	_SEG14('x', 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1),	\
+	_SEG14('y', 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0),	\
+	_SEG14('z', 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0),
+
+#define _MAP_123_126_ASCII_SEG14_SYMBOL				\
+	_SEG14('{', 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0),	\
+	_SEG14('|', 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0),	\
+	_SEG14('}', 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1),	\
+	_SEG14('~', 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0),
+
+/* Maps */
+#define MAP_ASCII14SEG_ALPHANUM			\
+	_MAP_0_32_ASCII_SEG14_NON_PRINTABLE	\
+	_MAP_33_47_ASCII_SEG14_SYMBOL		\
+	_MAP_48_57_ASCII_SEG14_NUMERIC		\
+	_MAP_58_64_ASCII_SEG14_SYMBOL		\
+	_MAP_65_90_ASCII_SEG14_ALPHA_UPPER	\
+	_MAP_91_96_ASCII_SEG14_SYMBOL		\
+	_MAP_97_122_ASCII_SEG14_ALPHA_LOWER	\
+	_MAP_123_126_ASCII_SEG14_SYMBOL
+
+#define SEG14_DEFAULT_MAP(_name)		\
+	SEG14_CONVERSION_MAP(_name, MAP_ASCII14SEG_ALPHANUM)
+
+#endif	/* MAP_TO_14SEGMENT_H */
-- 
cgit v1.2.3


From 9eeb3aa33ae005526f672b394c1791578463513f Mon Sep 17 00:00:00 2001
From: Hengqi Chen <hengqi.chen@gmail.com>
Date: Thu, 21 Oct 2021 21:47:51 +0800
Subject: bpf: Add bpf_skc_to_unix_sock() helper

The helper is used in tracing programs to cast a socket
pointer to a unix_sock pointer.
The return value could be NULL if the casting is illegal.

Suggested-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20211021134752.1223426-2-hengqi.chen@gmail.com
---
 include/uapi/linux/bpf.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6fc59d61937a..22e7a3f38b9f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4909,6 +4909,12 @@ union bpf_attr {
  *	Return
  *		The number of bytes written to the buffer, or a negative error
  *		in case of failure.
+ *
+ * struct unix_sock *bpf_skc_to_unix_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *unix_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or **NULL** otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5089,6 +5095,7 @@ union bpf_attr {
 	FN(task_pt_regs),		\
 	FN(get_branch_snapshot),	\
 	FN(trace_vprintk),		\
+	FN(skc_to_unix_sock),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From aba64c7da98330141dcdadd5612f088043a83696 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Wed, 20 Oct 2021 00:48:17 -0700
Subject: bpf: Add verified_insns to bpf_prog_info and fdinfo

This stat is currently printed in the verifier log and not stored
anywhere. To ease consumption of this data, add a field to bpf_prog_aux
so it can be exposed via BPF_OBJ_GET_INFO_BY_FD and fdinfo.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20211020074818.1017682-2-davemarchevsky@fb.com
---
 include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 22e7a3f38b9f..c10820037883 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5620,6 +5620,7 @@ struct bpf_prog_info {
 	__u64 run_time_ns;
 	__u64 run_cnt;
 	__u64 recursion_misses;
+	__u32 verified_insns;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
-- 
cgit v1.2.3


From 63dfe0709643528290c8a6825f278eda0e3f3c2e Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sat, 18 Sep 2021 18:56:32 +0900
Subject: can: bittiming: allow TDC{V,O} to be zero and add
 can_tdc_const::tdc{v,o,f}_min

ISO 11898-1 specifies in section 11.3.3 "Transmitter delay
compensation" that "the configuration range for [the] SSP position
shall be at least 0 to 63 minimum time quanta."

Because SSP = TDCV + TDCO, it means that we should allow both TDCV and
TDCO to hold zero value in order to honor SSP's minimum possible
value.

However, current implementation assigned special meaning to TDCV and
TDCO's zero values:
  * TDCV = 0 -> TDCV is automatically measured by the transceiver.
  * TDCO = 0 -> TDC is off.

In order to allow for those values to really be zero and to maintain
current features, we introduce two new flags:
  * CAN_CTRLMODE_TDC_AUTO indicates that the controller support
    automatic measurement of TDCV.
  * CAN_CTRLMODE_TDC_MANUAL indicates that the controller support
    manual configuration of TDCV. N.B.: current implementation failed
    to provide an option for the driver to indicate that only manual
    mode was supported.

TDC is disabled if both CAN_CTRLMODE_TDC_AUTO and
CAN_CTRLMODE_TDC_MANUAL flags are off, c.f. the helper function
can_tdc_is_enabled() which is also introduced in this patch.

Also, this patch adds three fields: tdcv_min, tdco_min and tdcf_min to
struct can_tdc_const. While we are not convinced that those three
fields could be anything else than zero, we can imagine that some
controllers might specify a lower bound on these. Thus, those minimums
are really added "just in case".

Comments of struct can_tdc and can_tdc_const are updated accordingly.

Finally, the changes are applied to the etas_es58x driver.

Link: https://lore.kernel.org/all/20210918095637.20108-2-mailhol.vincent@wanadoo.fr
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/netlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index f730d443b918..004cd09a7d49 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@@ -101,6 +101,8 @@ struct can_ctrlmode {
 #define CAN_CTRLMODE_PRESUME_ACK	0x40	/* Ignore missing CAN ACKs */
 #define CAN_CTRLMODE_FD_NON_ISO		0x80	/* CAN FD in non-ISO mode */
 #define CAN_CTRLMODE_CC_LEN8_DLC	0x100	/* Classic CAN DLC option */
+#define CAN_CTRLMODE_TDC_AUTO		0x200	/* CAN transiver automatically calculates TDCV */
+#define CAN_CTRLMODE_TDC_MANUAL		0x400	/* TDCV is manually set up by user */
 
 /*
  * CAN device statistics
-- 
cgit v1.2.3


From d99755f71a80df33b981484f0d3bb956ed15a247 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sat, 18 Sep 2021 18:56:35 +0900
Subject: can: netlink: add interface for CAN-FD Transmitter Delay Compensation
 (TDC)

Add the netlink interface for TDC parameters of struct can_tdc_const
and can_tdc.

Contrary to the can_bittiming(_const) structures for which there is
just a single IFLA_CAN(_DATA)_BITTMING(_CONST) entry per structure,
here, we create a nested entry IFLA_CAN_TDC. Within this nested entry,
additional IFLA_CAN_TDC_TDC* entries are added for each of the TDC
parameters of the newly introduced struct can_tdc_const and struct
can_tdc.

For struct can_tdc_const, these are:
        IFLA_CAN_TDC_TDCV_MIN
        IFLA_CAN_TDC_TDCV_MAX
        IFLA_CAN_TDC_TDCO_MIN
        IFLA_CAN_TDC_TDCO_MAX
        IFLA_CAN_TDC_TDCF_MIN
        IFLA_CAN_TDC_TDCF_MAX

For struct can_tdc, these are:
        IFLA_CAN_TDC_TDCV
        IFLA_CAN_TDC_TDCO
        IFLA_CAN_TDC_TDCF

This is done so that changes can be applied in the future to the
structures without breaking the netlink interface.

The TDC netlink logic works as follow:

 * CAN_CTRLMODE_FD is not provided:
    - if any TDC parameters are provided: error.

    - TDC parameters not provided: TDC parameters unchanged.

 * CAN_CTRLMODE_FD is provided and is false:
     - TDC is deactivated: both the structure and the
       CAN_CTRLMODE_TDC_{AUTO,MANUAL} flags are flushed.

 * CAN_CTRLMODE_FD provided and is true:
    - CAN_CTRLMODE_TDC_{AUTO,MANUAL} and tdc{v,o,f} not provided: call
      can_calc_tdco() to automatically decide whether TDC should be
      activated and, if so, set CAN_CTRLMODE_TDC_AUTO and uses the
      calculated tdco value.

    - CAN_CTRLMODE_TDC_AUTO and tdco provided: set
      CAN_CTRLMODE_TDC_AUTO and use the provided tdco value. Here,
      tdcv is illegal and tdcf is optional.

    - CAN_CTRLMODE_TDC_MANUAL and both of tdcv and tdco provided: set
      CAN_CTRLMODE_TDC_MANUAL and use the provided tdcv and tdco
      value. Here, tdcf is optional.

    - CAN_CTRLMODE_TDC_{AUTO,MANUAL} are mutually exclusive. Whenever
      one flag is turned on, the other will automatically be turned
      off. Providing both returns an error.

    - Combination other than the one listed above are illegal and will
      return an error.

N.B. above rules mean that whenever CAN_CTRLMODE_FD is provided, the
previous TDC values will be overwritten. The only option to reuse
previous TDC value is to not provide CAN_CTRLMODE_FD.

All the new parameters are defined as u32. This arbitrary choice is
done to mimic the other bittiming values with are also all of type
u32. An u16 would have been sufficient to hold the TDC values.

This patch completes below series (c.f. [1]):
  - commit 289ea9e4ae59 ("can: add new CAN FD bittiming parameters:
    Transmitter Delay Compensation (TDC)")
  - commit c25cc7993243 ("can: bittiming: add calculation for CAN FD
    Transmitter Delay Compensation (TDC)")

[1] https://lore.kernel.org/linux-can/20210224002008.4158-1-mailhol.vincent@wanadoo.fr/T/#t

Link: https://lore.kernel.org/all/20210918095637.20108-5-mailhol.vincent@wanadoo.fr
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/netlink.h | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index 004cd09a7d49..75b85c60efb2 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@@ -136,10 +136,35 @@ enum {
 	IFLA_CAN_BITRATE_CONST,
 	IFLA_CAN_DATA_BITRATE_CONST,
 	IFLA_CAN_BITRATE_MAX,
-	__IFLA_CAN_MAX
+	IFLA_CAN_TDC,
+
+	/* add new constants above here */
+	__IFLA_CAN_MAX,
+	IFLA_CAN_MAX = __IFLA_CAN_MAX - 1
 };
 
-#define IFLA_CAN_MAX	(__IFLA_CAN_MAX - 1)
+/*
+ * CAN FD Transmitter Delay Compensation (TDC)
+ *
+ * Please refer to struct can_tdc_const and can_tdc in
+ * include/linux/can/bittiming.h for further details.
+ */
+enum {
+	IFLA_CAN_TDC_UNSPEC,
+	IFLA_CAN_TDC_TDCV_MIN,	/* u32 */
+	IFLA_CAN_TDC_TDCV_MAX,	/* u32 */
+	IFLA_CAN_TDC_TDCO_MIN,	/* u32 */
+	IFLA_CAN_TDC_TDCO_MAX,	/* u32 */
+	IFLA_CAN_TDC_TDCF_MIN,	/* u32 */
+	IFLA_CAN_TDC_TDCF_MAX,	/* u32 */
+	IFLA_CAN_TDC_TDCV,	/* u32 */
+	IFLA_CAN_TDC_TDCO,	/* u32 */
+	IFLA_CAN_TDC_TDCF,	/* u32 */
+
+	/* add new constants above here */
+	__IFLA_CAN_TDC,
+	IFLA_CAN_TDC_MAX = __IFLA_CAN_TDC - 1
+};
 
 /* u16 termination range: 1..65535 Ohms */
 #define CAN_TERMINATION_DISABLED 0
-- 
cgit v1.2.3


From a9d496d8e08ca1eb43d14cb734839b24ab0e8083 Mon Sep 17 00:00:00 2001
From: David Edmondson <david.edmondson@oracle.com>
Date: Mon, 20 Sep 2021 11:37:34 +0100
Subject: KVM: x86: Clarify the kvm_run.emulation_failure structure layout

Until more flags for kvm_run.emulation_failure flags are defined, it
is undetermined whether new payload elements corresponding to those
flags will be additive or alternative. As a hint to userspace that an
alternative is possible, wrap the current payload elements in a union.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210920103737.2696756-2-david.edmondson@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 5ca5ffe16cb4..2c8aa8d4dac1 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -403,8 +403,12 @@ struct kvm_run {
 			__u32 suberror;
 			__u32 ndata;
 			__u64 flags;
-			__u8  insn_size;
-			__u8  insn_bytes[15];
+			union {
+				struct {
+					__u8  insn_size;
+					__u8  insn_bytes[15];
+				};
+			};
 		} emulation_failure;
 		/* KVM_EXIT_OSI */
 		struct {
-- 
cgit v1.2.3


From e615e355894e619785af81479ad6f5a05a8a2e3f Mon Sep 17 00:00:00 2001
From: David Edmondson <david.edmondson@oracle.com>
Date: Mon, 20 Sep 2021 11:37:36 +0100
Subject: KVM: x86: On emulation failure, convey the exit reason, etc. to
 userspace

Should instruction emulation fail, include the VM exit reason, etc. in
the emulation_failure data passed to userspace, in order that the VMM
can report it as a debugging aid when describing the failure.

Suggested-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210920103737.2696756-4-david.edmondson@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 2c8aa8d4dac1..78f0719cc2a3 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -398,6 +398,11 @@ struct kvm_run {
 		 * "ndata" is correct, that new fields are enumerated in "flags",
 		 * and that each flag enumerates fields that are 64-bit aligned
 		 * and sized (so that ndata+internal.data[] is valid/accurate).
+		 *
+		 * Space beyond the defined fields may be used to store arbitrary
+		 * debug information relating to the emulation failure. It is
+		 * accounted for in "ndata" but the format is unspecified and is
+		 * not represented in "flags". Any such information is *not* ABI!
 		 */
 		struct {
 			__u32 suberror;
@@ -409,6 +414,7 @@ struct kvm_run {
 					__u8  insn_bytes[15];
 				};
 			};
+			/* Arbitrary debug data may follow. */
 		} emulation_failure;
 		/* KVM_EXIT_OSI */
 		struct {
-- 
cgit v1.2.3


From 1cf4e9a6fbdbc9850216a2a6d8ed52888679a077 Mon Sep 17 00:00:00 2001
From: Luo Jie <luoj@codeaurora.org>
Date: Sun, 24 Oct 2021 16:27:33 +0800
Subject: net: phy: add constants for fast retrain related register

Add the constants for 2.5G fast retrain capability
in 10G AN control register, fast retrain status and
control register and THP bypass register into mdio.h.

Signed-off-by: Luo Jie <luoj@codeaurora.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mdio.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index bdf77dffa5a4..c54e6eae5366 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -53,12 +53,14 @@
 #define MDIO_AN_EEE_LPABLE	61	/* EEE link partner ability */
 #define MDIO_AN_EEE_ADV2	62	/* EEE advertisement 2 */
 #define MDIO_AN_EEE_LPABLE2	63	/* EEE link partner ability 2 */
+#define MDIO_AN_CTRL2		64	/* AN THP bypass request control */
 
 /* Media-dependent registers. */
 #define MDIO_PMA_10GBT_SWAPPOL	130	/* 10GBASE-T pair swap & polarity */
 #define MDIO_PMA_10GBT_TXPWR	131	/* 10GBASE-T TX power control */
 #define MDIO_PMA_10GBT_SNR	133	/* 10GBASE-T SNR margin, lane A.
 					 * Lanes B-D are numbered 134-136. */
+#define MDIO_PMA_10GBR_FSRT_CSR	147	/* 10GBASE-R fast retrain status and control */
 #define MDIO_PMA_10GBR_FECABLE	170	/* 10GBASE-R FEC ability */
 #define MDIO_PCS_10GBX_STAT1	24	/* 10GBASE-X PCS status 1 */
 #define MDIO_PCS_10GBRT_STAT1	32	/* 10GBASE-R/-T PCS status 1 */
@@ -239,6 +241,9 @@
 #define MDIO_PMA_10GBR_FECABLE_ABLE	0x0001	/* FEC ability */
 #define MDIO_PMA_10GBR_FECABLE_ERRABLE	0x0002	/* FEC error indic. ability */
 
+/* PMA 10GBASE-R Fast Retrain status and control register. */
+#define MDIO_PMA_10GBR_FSRT_ENABLE	0x0001	/* Fast retrain enable */
+
 /* PCS 10GBASE-R/-T status register 1. */
 #define MDIO_PCS_10GBRT_STAT1_BLKLK	0x0001	/* Block lock attained */
 
@@ -247,6 +252,7 @@
 #define MDIO_PCS_10GBRT_STAT2_BER	0x3f00
 
 /* AN 10GBASE-T control register. */
+#define MDIO_AN_10GBT_CTRL_ADVFSRT2_5G	0x0020	/* Advertise 2.5GBASE-T fast retrain */
 #define MDIO_AN_10GBT_CTRL_ADV2_5G	0x0080	/* Advertise 2.5GBASE-T */
 #define MDIO_AN_10GBT_CTRL_ADV5G	0x0100	/* Advertise 5GBASE-T */
 #define MDIO_AN_10GBT_CTRL_ADV10G	0x1000	/* Advertise 10GBASE-T */
@@ -289,6 +295,9 @@
 #define MDIO_EEE_2_5GT		0x0001	/* 2.5GT EEE cap */
 #define MDIO_EEE_5GT		0x0002	/* 5GT EEE cap */
 
+/* AN MultiGBASE-T AN control 2 */
+#define MDIO_AN_THP_BP2_5GT	0x0008	/* 2.5GT THP bypass request */
+
 /* 2.5G/5G Extended abilities register. */
 #define MDIO_PMA_NG_EXTABLE_2_5GBT	0x0001	/* 2.5GBASET ability */
 #define MDIO_PMA_NG_EXTABLE_5GBT	0x0002	/* 5GBASET ability */
-- 
cgit v1.2.3


From 99ce45d5e7dbde399997a630f45ac9f654fa4bcc Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@codeconstruct.com.au>
Date: Tue, 26 Oct 2021 09:57:28 +0800
Subject: mctp: Implement extended addressing

This change allows an extended address struct - struct sockaddr_mctp_ext
- to be passed to sendmsg/recvmsg. This allows userspace to specify
output ifindex and physical address information (for sendmsg) or receive
the input ifindex/physaddr for incoming messages (for recvmsg). This is
typically used by userspace for MCTP address discovery and assignment
operations.

The extended addressing facility is conditional on a new sockopt:
MCTP_OPT_ADDR_EXT; userspace must explicitly enable addressing before
the kernel will consume/populate the extended address data.

Includes a fix for an uninitialised var:
Reported-by: kernel test robot <lkp@intel.com>

Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mctp.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
index 6acd4ccafbf7..07b0318716fc 100644
--- a/include/uapi/linux/mctp.h
+++ b/include/uapi/linux/mctp.h
@@ -11,6 +11,7 @@
 
 #include <linux/types.h>
 #include <linux/socket.h>
+#include <linux/netdevice.h>
 
 typedef __u8			mctp_eid_t;
 
@@ -28,6 +29,14 @@ struct sockaddr_mctp {
 	__u8			__smctp_pad1;
 };
 
+struct sockaddr_mctp_ext {
+	struct sockaddr_mctp	smctp_base;
+	int			smctp_ifindex;
+	__u8			smctp_halen;
+	__u8			__smctp_pad0[3];
+	__u8			smctp_haddr[MAX_ADDR_LEN];
+};
+
 #define MCTP_NET_ANY		0x0
 
 #define MCTP_ADDR_NULL		0x00
@@ -36,4 +45,6 @@ struct sockaddr_mctp {
 #define MCTP_TAG_MASK		0x07
 #define MCTP_TAG_OWNER		0x08
 
+#define MCTP_OPT_ADDR_EXT	1
+
 #endif /* __UAPI_MCTP_H */
-- 
cgit v1.2.3


From 8d11a4f43ef4679be0908026907a7613b33d7127 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Mon, 25 Oct 2021 16:27:33 -0300
Subject: fanotify: Reserve UAPI bits for FAN_FS_ERROR

FAN_FS_ERROR allows reporting of event type FS_ERROR to userspace, which
is a mechanism to report file system wide problems via fanotify.  This
commit preallocate userspace visible bits to match the FS_ERROR event.

Link: https://lore.kernel.org/r/20211025192746.66445-19-krisman@collabora.com
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/uapi/linux/fanotify.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 64553df9d735..2990731ddc8b 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -20,6 +20,7 @@
 #define FAN_OPEN_EXEC		0x00001000	/* File was opened for exec */
 
 #define FAN_Q_OVERFLOW		0x00004000	/* Event queued overflowed */
+#define FAN_FS_ERROR		0x00008000	/* Filesystem error */
 
 #define FAN_OPEN_PERM		0x00010000	/* File open in perm check */
 #define FAN_ACCESS_PERM		0x00020000	/* File accessed in perm check */
-- 
cgit v1.2.3


From 130a3c742107acff985541c28360c8b40203559c Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Mon, 25 Oct 2021 16:27:42 -0300
Subject: fanotify: Emit generic error info for error event

The error info is a record sent to users on FAN_FS_ERROR events
documenting the type of error.  It also carries an error count,
documenting how many errors were observed since the last reporting.

Link: https://lore.kernel.org/r/20211025192746.66445-28-krisman@collabora.com
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/uapi/linux/fanotify.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 2990731ddc8b..bd1932c2074d 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -126,6 +126,7 @@ struct fanotify_event_metadata {
 #define FAN_EVENT_INFO_TYPE_DFID_NAME	2
 #define FAN_EVENT_INFO_TYPE_DFID	3
 #define FAN_EVENT_INFO_TYPE_PIDFD	4
+#define FAN_EVENT_INFO_TYPE_ERROR	5
 
 /* Variable length info record following event metadata */
 struct fanotify_event_info_header {
@@ -160,6 +161,12 @@ struct fanotify_event_info_pidfd {
 	__s32 pidfd;
 };
 
+struct fanotify_event_info_error {
+	struct fanotify_event_info_header hdr;
+	__s32 error;
+	__u32 error_count;
+};
+
 struct fanotify_response {
 	__s32 fd;
 	__u32 response;
-- 
cgit v1.2.3


From 2cc1ae4878282c75a569e8ec677d569601c99dda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michael=20Wei=C3=9F?= <michael.weiss@aisec.fraunhofer.de>
Date: Sat, 4 Sep 2021 11:59:28 +0200
Subject: dm: introduce audit event module for device mapper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To be able to send auditing events to user space, we introduce a
generic dm-audit module. It provides helper functions to emit audit
events through the kernel audit subsystem. We claim the
AUDIT_DM_CTRL type=1336 and AUDIT_DM_EVENT type=1337 out of the
audit event messages range in the corresponding userspace api in
'include/uapi/linux/audit.h' for those events.

AUDIT_DM_CTRL is used to provide information about creation and
destruction of device mapper targets which are triggered by user space
admin control actions.
AUDIT_DM_EVENT is used to provide information about actual errors
during operation of the mapped device, showing e.g. integrity
violations in audit log.

Following commits to device mapper targets actually will make use of
this to emit those events in relevant cases.

The audit logs look like this if executing the following simple test:

 # dd if=/dev/zero of=test.img bs=1M count=1024
 # losetup -f test.img
 # integritysetup -vD format --integrity sha256 -t 32 /dev/loop0
 # integritysetup open -D /dev/loop0 --integrity sha256 integritytest
 # integritysetup status integritytest
 # integritysetup close integritytest
 # integritysetup open -D /dev/loop0 --integrity sha256 integritytest
 # integritysetup status integritytest
 # dd if=/dev/urandom of=/dev/loop0 bs=512 count=1 seek=100000
 # dd if=/dev/mapper/integritytest of=/dev/null

-------------------------
audit.log from auditd

type=UNKNOWN[1336] msg=audit(1630425039.363:184): module=integrity
op=ctr ppid=3807 pid=3819 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0
egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup"
exe="/sbin/integritysetup" subj==unconfined dev=254:3
error_msg='success' res=1
type=UNKNOWN[1336] msg=audit(1630425039.471:185): module=integrity
op=dtr ppid=3807 pid=3819 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0
egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup"
exe="/sbin/integritysetup" subj==unconfined dev=254:3
error_msg='success' res=1
type=UNKNOWN[1336] msg=audit(1630425039.611:186): module=integrity
op=ctr ppid=3807 pid=3819 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0
egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup"
exe="/sbin/integritysetup" subj==unconfined dev=254:3
error_msg='success' res=1
type=UNKNOWN[1336] msg=audit(1630425054.475:187): module=integrity
op=dtr ppid=3807 pid=3819 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0
egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup"
exe="/sbin/integritysetup" subj==unconfined dev=254:3
error_msg='success' res=1

type=UNKNOWN[1336] msg=audit(1630425073.171:191): module=integrity
op=ctr ppid=3807 pid=3883 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0
egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup"
exe="/sbin/integritysetup" subj==unconfined dev=254:3
error_msg='success' res=1

type=UNKNOWN[1336] msg=audit(1630425087.239:192): module=integrity
op=dtr ppid=3807 pid=3902 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0
egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup"
exe="/sbin/integritysetup" subj==unconfined dev=254:3
error_msg='success' res=1

type=UNKNOWN[1336] msg=audit(1630425093.755:193): module=integrity
op=ctr ppid=3807 pid=3906 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0
egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup"
exe="/sbin/integritysetup" subj==unconfined dev=254:3
error_msg='success' res=1

type=UNKNOWN[1337] msg=audit(1630425112.119:194): module=integrity
op=integrity-checksum dev=254:3 sector=77480 res=0
type=UNKNOWN[1337] msg=audit(1630425112.119:195): module=integrity
op=integrity-checksum dev=254:3 sector=77480 res=0
type=UNKNOWN[1337] msg=audit(1630425112.119:196): module=integrity
op=integrity-checksum dev=254:3 sector=77480 res=0
type=UNKNOWN[1337] msg=audit(1630425112.119:197): module=integrity
op=integrity-checksum dev=254:3 sector=77480 res=0
type=UNKNOWN[1337] msg=audit(1630425112.119:198): module=integrity
op=integrity-checksum dev=254:3 sector=77480 res=0
type=UNKNOWN[1337] msg=audit(1630425112.119:199): module=integrity
op=integrity-checksum dev=254:3 sector=77480 res=0
type=UNKNOWN[1337] msg=audit(1630425112.119:200): module=integrity
op=integrity-checksum dev=254:3 sector=77480 res=0
type=UNKNOWN[1337] msg=audit(1630425112.119:201): module=integrity
op=integrity-checksum dev=254:3 sector=77480 res=0
type=UNKNOWN[1337] msg=audit(1630425112.119:202): module=integrity
op=integrity-checksum dev=254:3 sector=77480 res=0
type=UNKNOWN[1337] msg=audit(1630425112.119:203): module=integrity
op=integrity-checksum dev=254:3 sector=77480 res=0

Signed-off-by: Michael Weiß <michael.weiss@aisec.fraunhofer.de>
Signed-off-by: Paul Moore <paul@paul-moore.com> # fix audit.h numbering
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 include/uapi/linux/audit.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index daa481729e9b..809e4c2041b3 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -118,6 +118,8 @@
 #define AUDIT_TIME_ADJNTPVAL	1333	/* NTP value adjustment */
 #define AUDIT_BPF		1334	/* BPF subsystem */
 #define AUDIT_EVENT_LISTENER	1335	/* Task joined multicast read socket */
+#define AUDIT_DM_CTRL		1338	/* Device Mapper target control */
+#define AUDIT_DM_EVENT		1339	/* Device Mapper events */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
-- 
cgit v1.2.3


From a390ccb316beb8ea594b8695d53926710ca454a3 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sun, 24 Oct 2021 16:26:07 +0300
Subject: fuse: add FOPEN_NOFLUSH

Add flag returned by FUSE_OPEN and FUSE_CREATE requests to avoid flushing
data cache on close.

Different filesystems implement ->flush() is different ways:
 - Most disk filesystems do not implement ->flush() at all
 - Some network filesystem (e.g. nfs) flush local write cache of
   FMODE_WRITE file and send a "flush" command to server
 - Some network filesystem (e.g. cifs) flush local write cache of
   FMODE_WRITE file without sending an additional command to server

FUSE flushes local write cache of ANY file, even non FMODE_WRITE
and sends a "flush" command to server (if server implements it).

The FUSE implementation of ->flush() seems over agressive and
arbitrary and does not make a lot of sense when writeback caching is
disabled.

Instead of deciding on another arbitrary implementation that makes
sense, leave the choice of per-file flush behavior in the hands of
the server.

Link: https://lore.kernel.org/linux-fsdevel/CAJfpegspE8e6aKd47uZtSYX8Y-1e1FWS0VL0DH2Skb9gQP5RJQ@mail.gmail.com/
Suggested-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/uapi/linux/fuse.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 36ed092227fa..a1dc3ee1d17c 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -184,6 +184,9 @@
  *
  *  7.34
  *  - add FUSE_SYNCFS
+ *
+ *  7.35
+ *  - add FOPEN_NOFLUSH
  */
 
 #ifndef _LINUX_FUSE_H
@@ -219,7 +222,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 34
+#define FUSE_KERNEL_MINOR_VERSION 35
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -290,12 +293,14 @@ struct fuse_file_lock {
  * FOPEN_NONSEEKABLE: the file is not seekable
  * FOPEN_CACHE_DIR: allow caching this directory
  * FOPEN_STREAM: the file is stream-like (no file position at all)
+ * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE)
  */
 #define FOPEN_DIRECT_IO		(1 << 0)
 #define FOPEN_KEEP_CACHE	(1 << 1)
 #define FOPEN_NONSEEKABLE	(1 << 2)
 #define FOPEN_CACHE_DIR		(1 << 3)
 #define FOPEN_STREAM		(1 << 4)
+#define FOPEN_NOFLUSH		(1 << 5)
 
 /**
  * INIT request/reply flags
-- 
cgit v1.2.3


From 9330986c03006ab1d33d243b7cfe598a7a3c1baa Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannekoong@fb.com>
Date: Wed, 27 Oct 2021 16:45:00 -0700
Subject: bpf: Add bloom filter map implementation

This patch adds the kernel-side changes for the implementation of
a bpf bloom filter map.

The bloom filter map supports peek (determining whether an element
is present in the map) and push (adding an element to the map)
operations.These operations are exposed to userspace applications
through the already existing syscalls in the following way:

BPF_MAP_LOOKUP_ELEM -> peek
BPF_MAP_UPDATE_ELEM -> push

The bloom filter map does not have keys, only values. In light of
this, the bloom filter map's API matches that of queue stack maps:
user applications use BPF_MAP_LOOKUP_ELEM/BPF_MAP_UPDATE_ELEM
which correspond internally to bpf_map_peek_elem/bpf_map_push_elem,
and bpf programs must use the bpf_map_peek_elem and bpf_map_push_elem
APIs to query or add an element to the bloom filter map. When the
bloom filter map is created, it must be created with a key_size of 0.

For updates, the user will pass in the element to add to the map
as the value, with a NULL key. For lookups, the user will pass in the
element to query in the map as the value, with a NULL key. In the
verifier layer, this requires us to modify the argument type of
a bloom filter's BPF_FUNC_map_peek_elem call to ARG_PTR_TO_MAP_VALUE;
as well, in the syscall layer, we need to copy over the user value
so that in bpf_map_peek_elem, we know which specific value to query.

A few things to please take note of:
 * If there are any concurrent lookups + updates, the user is
responsible for synchronizing this to ensure no false negative lookups
occur.
 * The number of hashes to use for the bloom filter is configurable from
userspace. If no number is specified, the default used will be 5 hash
functions. The benchmarks later in this patchset can help compare the
performance of using different number of hashes on different entry
sizes. In general, using more hashes decreases both the false positive
rate and the speed of a lookup.
 * Deleting an element in the bloom filter map is not supported.
 * The bloom filter map may be used as an inner map.
 * The "max_entries" size that is specified at map creation time is used
to approximate a reasonable bitmap size for the bloom filter, and is not
otherwise strictly enforced. If the user wishes to insert more entries
into the bloom filter than "max_entries", they may do so but they should
be aware that this may lead to a higher false positive rate.

Signed-off-by: Joanne Koong <joannekoong@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211027234504.30744-2-joannekoong@fb.com
---
 include/uapi/linux/bpf.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c10820037883..8bead4aa3ad0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -906,6 +906,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
 	BPF_MAP_TYPE_TASK_STORAGE,
+	BPF_MAP_TYPE_BLOOM_FILTER,
 };
 
 /* Note that tracing related programs such as
@@ -1274,6 +1275,13 @@ union bpf_attr {
 						   * struct stored as the
 						   * map value
 						   */
+		/* Any per-map-type extra fields
+		 *
+		 * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
+		 * number of hash functions (if 0, the bloom filter will default
+		 * to using 5 hash functions).
+		 */
+		__u64	map_extra;
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -5638,6 +5646,7 @@ struct bpf_map_info {
 	__u32 btf_id;
 	__u32 btf_key_type_id;
 	__u32 btf_value_type_id;
+	__u64 map_extra;
 } __attribute__((aligned(8)));
 
 struct bpf_btf_info {
-- 
cgit v1.2.3


From d6aef08a872b9e23eecc92d0e92393473b13c497 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 28 Oct 2021 12:04:54 +0530
Subject: bpf: Add bpf_kallsyms_lookup_name helper

This helper allows us to get the address of a kernel symbol from inside
a BPF_PROG_TYPE_SYSCALL prog (used by gen_loader), so that we can
relocate typeless ksym vars.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20211028063501.2239335-2-memxor@gmail.com
---
 include/uapi/linux/bpf.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8bead4aa3ad0..bd0c9f0487f6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4923,6 +4923,21 @@ union bpf_attr {
  *		Dynamically cast a *sk* pointer to a *unix_sock* pointer.
  *	Return
  *		*sk* if casting is valid, or **NULL** otherwise.
+ *
+ * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res)
+ *	Description
+ *		Get the address of a kernel symbol, returned in *res*. *res* is
+ *		set to 0 if the symbol is not found.
+ *	Return
+ *		On success, zero. On error, a negative value.
+ *
+ *		**-EINVAL** if *flags* is not zero.
+ *
+ *		**-EINVAL** if string *name* is not the same size as *name_sz*.
+ *
+ *		**-ENOENT** if symbol is not found.
+ *
+ *		**-EPERM** if caller does not have permission to obtain kernel address.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5104,6 +5119,7 @@ union bpf_attr {
 	FN(get_branch_snapshot),	\
 	FN(trace_vprintk),		\
 	FN(skc_to_unix_sock),		\
+	FN(kallsyms_lookup_name),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From e77fbf990316d47968a793d8aa920fd62385aec9 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 22 Oct 2021 16:53:36 +0200
Subject: btrfs: send: prepare for v2 protocol

This is preparatory work for send protocol update to version 2 and
higher.

We have many pending protocol update requests but still don't have the
basic protocol rev in place, the first thing that must happen is to do
the actual versioning support.

The protocol version is u32 and is a new member in the send ioctl
struct. Validity of the version field is backed by a new flag bit. Old
kernels would fail when a higher version is requested. Version protocol
0 will pick the highest supported version, BTRFS_SEND_STREAM_VERSION,
  that's also exported in sysfs.

The version is still unchanged and will be increased once we have new
incompatible commands or stream updates.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index d7d3cfead056..738619994e26 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -771,10 +771,16 @@ struct btrfs_ioctl_received_subvol_args {
  */
 #define BTRFS_SEND_FLAG_OMIT_END_CMD		0x4
 
+/*
+ * Read the protocol version in the structure
+ */
+#define BTRFS_SEND_FLAG_VERSION			0x8
+
 #define BTRFS_SEND_FLAG_MASK \
 	(BTRFS_SEND_FLAG_NO_FILE_DATA | \
 	 BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
-	 BTRFS_SEND_FLAG_OMIT_END_CMD)
+	 BTRFS_SEND_FLAG_OMIT_END_CMD | \
+	 BTRFS_SEND_FLAG_VERSION)
 
 struct btrfs_ioctl_send_args {
 	__s64 send_fd;			/* in */
@@ -782,7 +788,8 @@ struct btrfs_ioctl_send_args {
 	__u64 __user *clone_sources;	/* in */
 	__u64 parent_root;		/* in */
 	__u64 flags;			/* in */
-	__u64 reserved[4];		/* in */
+	__u32 version;			/* in */
+	__u8  reserved[28];		/* in */
 };
 
 /*
-- 
cgit v1.2.3


From cf2197ca4b8c199d188593ca6800ea1827c42171 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Fri, 29 Oct 2021 14:09:29 +0800
Subject: bcache: move uapi header bcache.h to bcache code directory

The header file include/uapi/linux/bcache.h is not really a user space
API heaer. This file defines the ondisk format of bcache internal meta
data but no one includes it from user space, bcache-tools has its own
copy of this header with minor modification.

Therefore, this patch moves include/uapi/linux/bcache.h to bcache code
directory as drivers/md/bcache/bcache_ondisk.h.

Suggested-by: Arnd Bergmann <arnd@kernel.org>
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Coly Li <colyli@suse.de>
Link: https://lore.kernel.org/r/20211029060930.119923-2-colyli@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/bcache.h | 445 --------------------------------------------
 1 file changed, 445 deletions(-)
 delete mode 100644 include/uapi/linux/bcache.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
deleted file mode 100644
index 97413586195b..000000000000
--- a/include/uapi/linux/bcache.h
+++ /dev/null
@@ -1,445 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _LINUX_BCACHE_H
-#define _LINUX_BCACHE_H
-
-/*
- * Bcache on disk data structures
- */
-
-#include <linux/types.h>
-
-#define BITMASK(name, type, field, offset, size)		\
-static inline __u64 name(const type *k)				\
-{ return (k->field >> offset) & ~(~0ULL << size); }		\
-								\
-static inline void SET_##name(type *k, __u64 v)			\
-{								\
-	k->field &= ~(~(~0ULL << size) << offset);		\
-	k->field |= (v & ~(~0ULL << size)) << offset;		\
-}
-
-/* Btree keys - all units are in sectors */
-
-struct bkey {
-	__u64	high;
-	__u64	low;
-	__u64	ptr[];
-};
-
-#define KEY_FIELD(name, field, offset, size)				\
-	BITMASK(name, struct bkey, field, offset, size)
-
-#define PTR_FIELD(name, offset, size)					\
-static inline __u64 name(const struct bkey *k, unsigned int i)		\
-{ return (k->ptr[i] >> offset) & ~(~0ULL << size); }			\
-									\
-static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v)	\
-{									\
-	k->ptr[i] &= ~(~(~0ULL << size) << offset);			\
-	k->ptr[i] |= (v & ~(~0ULL << size)) << offset;			\
-}
-
-#define KEY_SIZE_BITS		16
-#define KEY_MAX_U64S		8
-
-KEY_FIELD(KEY_PTRS,	high, 60, 3)
-KEY_FIELD(__PAD0,	high, 58, 2)
-KEY_FIELD(KEY_CSUM,	high, 56, 2)
-KEY_FIELD(__PAD1,	high, 55, 1)
-KEY_FIELD(KEY_DIRTY,	high, 36, 1)
-
-KEY_FIELD(KEY_SIZE,	high, 20, KEY_SIZE_BITS)
-KEY_FIELD(KEY_INODE,	high, 0,  20)
-
-/* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */
-
-static inline __u64 KEY_OFFSET(const struct bkey *k)
-{
-	return k->low;
-}
-
-static inline void SET_KEY_OFFSET(struct bkey *k, __u64 v)
-{
-	k->low = v;
-}
-
-/*
- * The high bit being set is a relic from when we used it to do binary
- * searches - it told you where a key started. It's not used anymore,
- * and can probably be safely dropped.
- */
-#define KEY(inode, offset, size)					\
-((struct bkey) {							\
-	.high = (1ULL << 63) | ((__u64) (size) << 20) | (inode),	\
-	.low = (offset)							\
-})
-
-#define ZERO_KEY			KEY(0, 0, 0)
-
-#define MAX_KEY_INODE			(~(~0 << 20))
-#define MAX_KEY_OFFSET			(~0ULL >> 1)
-#define MAX_KEY				KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0)
-
-#define KEY_START(k)			(KEY_OFFSET(k) - KEY_SIZE(k))
-#define START_KEY(k)			KEY(KEY_INODE(k), KEY_START(k), 0)
-
-#define PTR_DEV_BITS			12
-
-PTR_FIELD(PTR_DEV,			51, PTR_DEV_BITS)
-PTR_FIELD(PTR_OFFSET,			8,  43)
-PTR_FIELD(PTR_GEN,			0,  8)
-
-#define PTR_CHECK_DEV			((1 << PTR_DEV_BITS) - 1)
-
-#define MAKE_PTR(gen, offset, dev)					\
-	((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen)
-
-/* Bkey utility code */
-
-static inline unsigned long bkey_u64s(const struct bkey *k)
-{
-	return (sizeof(struct bkey) / sizeof(__u64)) + KEY_PTRS(k);
-}
-
-static inline unsigned long bkey_bytes(const struct bkey *k)
-{
-	return bkey_u64s(k) * sizeof(__u64);
-}
-
-#define bkey_copy(_dest, _src)	memcpy(_dest, _src, bkey_bytes(_src))
-
-static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src)
-{
-	SET_KEY_INODE(dest, KEY_INODE(src));
-	SET_KEY_OFFSET(dest, KEY_OFFSET(src));
-}
-
-static inline struct bkey *bkey_next(const struct bkey *k)
-{
-	__u64 *d = (void *) k;
-
-	return (struct bkey *) (d + bkey_u64s(k));
-}
-
-static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys)
-{
-	__u64 *d = (void *) k;
-
-	return (struct bkey *) (d + nr_keys);
-}
-/* Enough for a key with 6 pointers */
-#define BKEY_PAD		8
-
-#define BKEY_PADDED(key)					\
-	union { struct bkey key; __u64 key ## _pad[BKEY_PAD]; }
-
-/* Superblock */
-
-/* Version 0: Cache device
- * Version 1: Backing device
- * Version 2: Seed pointer into btree node checksum
- * Version 3: Cache device with new UUID format
- * Version 4: Backing device with data offset
- */
-#define BCACHE_SB_VERSION_CDEV			0
-#define BCACHE_SB_VERSION_BDEV			1
-#define BCACHE_SB_VERSION_CDEV_WITH_UUID	3
-#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET	4
-#define BCACHE_SB_VERSION_CDEV_WITH_FEATURES	5
-#define BCACHE_SB_VERSION_BDEV_WITH_FEATURES	6
-#define BCACHE_SB_MAX_VERSION			6
-
-#define SB_SECTOR			8
-#define SB_OFFSET			(SB_SECTOR << SECTOR_SHIFT)
-#define SB_SIZE				4096
-#define SB_LABEL_SIZE			32
-#define SB_JOURNAL_BUCKETS		256U
-/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */
-#define MAX_CACHES_PER_SET		8
-
-#define BDEV_DATA_START_DEFAULT		16	/* sectors */
-
-struct cache_sb_disk {
-	__le64			csum;
-	__le64			offset;	/* sector where this sb was written */
-	__le64			version;
-
-	__u8			magic[16];
-
-	__u8			uuid[16];
-	union {
-		__u8		set_uuid[16];
-		__le64		set_magic;
-	};
-	__u8			label[SB_LABEL_SIZE];
-
-	__le64			flags;
-	__le64			seq;
-
-	__le64			feature_compat;
-	__le64			feature_incompat;
-	__le64			feature_ro_compat;
-
-	__le64			pad[5];
-
-	union {
-	struct {
-		/* Cache devices */
-		__le64		nbuckets;	/* device size */
-
-		__le16		block_size;	/* sectors */
-		__le16		bucket_size;	/* sectors */
-
-		__le16		nr_in_set;
-		__le16		nr_this_dev;
-	};
-	struct {
-		/* Backing devices */
-		__le64		data_offset;
-
-		/*
-		 * block_size from the cache device section is still used by
-		 * backing devices, so don't add anything here until we fix
-		 * things to not need it for backing devices anymore
-		 */
-	};
-	};
-
-	__le32			last_mount;	/* time overflow in y2106 */
-
-	__le16			first_bucket;
-	union {
-		__le16		njournal_buckets;
-		__le16		keys;
-	};
-	__le64			d[SB_JOURNAL_BUCKETS];	/* journal buckets */
-	__le16			obso_bucket_size_hi;	/* obsoleted */
-};
-
-/*
- * This is for in-memory bcache super block.
- * NOTE: cache_sb is NOT exactly mapping to cache_sb_disk, the member
- *       size, ordering and even whole struct size may be different
- *       from cache_sb_disk.
- */
-struct cache_sb {
-	__u64			offset;	/* sector where this sb was written */
-	__u64			version;
-
-	__u8			magic[16];
-
-	__u8			uuid[16];
-	union {
-		__u8		set_uuid[16];
-		__u64		set_magic;
-	};
-	__u8			label[SB_LABEL_SIZE];
-
-	__u64			flags;
-	__u64			seq;
-
-	__u64			feature_compat;
-	__u64			feature_incompat;
-	__u64			feature_ro_compat;
-
-	union {
-	struct {
-		/* Cache devices */
-		__u64		nbuckets;	/* device size */
-
-		__u16		block_size;	/* sectors */
-		__u16		nr_in_set;
-		__u16		nr_this_dev;
-		__u32		bucket_size;	/* sectors */
-	};
-	struct {
-		/* Backing devices */
-		__u64		data_offset;
-
-		/*
-		 * block_size from the cache device section is still used by
-		 * backing devices, so don't add anything here until we fix
-		 * things to not need it for backing devices anymore
-		 */
-	};
-	};
-
-	__u32			last_mount;	/* time overflow in y2106 */
-
-	__u16			first_bucket;
-	union {
-		__u16		njournal_buckets;
-		__u16		keys;
-	};
-	__u64			d[SB_JOURNAL_BUCKETS];	/* journal buckets */
-};
-
-static inline _Bool SB_IS_BDEV(const struct cache_sb *sb)
-{
-	return sb->version == BCACHE_SB_VERSION_BDEV
-		|| sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET
-		|| sb->version == BCACHE_SB_VERSION_BDEV_WITH_FEATURES;
-}
-
-BITMASK(CACHE_SYNC,			struct cache_sb, flags, 0, 1);
-BITMASK(CACHE_DISCARD,			struct cache_sb, flags, 1, 1);
-BITMASK(CACHE_REPLACEMENT,		struct cache_sb, flags, 2, 3);
-#define CACHE_REPLACEMENT_LRU		0U
-#define CACHE_REPLACEMENT_FIFO		1U
-#define CACHE_REPLACEMENT_RANDOM	2U
-
-BITMASK(BDEV_CACHE_MODE,		struct cache_sb, flags, 0, 4);
-#define CACHE_MODE_WRITETHROUGH		0U
-#define CACHE_MODE_WRITEBACK		1U
-#define CACHE_MODE_WRITEAROUND		2U
-#define CACHE_MODE_NONE			3U
-BITMASK(BDEV_STATE,			struct cache_sb, flags, 61, 2);
-#define BDEV_STATE_NONE			0U
-#define BDEV_STATE_CLEAN		1U
-#define BDEV_STATE_DIRTY		2U
-#define BDEV_STATE_STALE		3U
-
-/*
- * Magic numbers
- *
- * The various other data structures have their own magic numbers, which are
- * xored with the first part of the cache set's UUID
- */
-
-#define JSET_MAGIC			0x245235c1a3625032ULL
-#define PSET_MAGIC			0x6750e15f87337f91ULL
-#define BSET_MAGIC			0x90135c78b99e07f5ULL
-
-static inline __u64 jset_magic(struct cache_sb *sb)
-{
-	return sb->set_magic ^ JSET_MAGIC;
-}
-
-static inline __u64 pset_magic(struct cache_sb *sb)
-{
-	return sb->set_magic ^ PSET_MAGIC;
-}
-
-static inline __u64 bset_magic(struct cache_sb *sb)
-{
-	return sb->set_magic ^ BSET_MAGIC;
-}
-
-/*
- * Journal
- *
- * On disk format for a journal entry:
- * seq is monotonically increasing; every journal entry has its own unique
- * sequence number.
- *
- * last_seq is the oldest journal entry that still has keys the btree hasn't
- * flushed to disk yet.
- *
- * version is for on disk format changes.
- */
-
-#define BCACHE_JSET_VERSION_UUIDv1	1
-#define BCACHE_JSET_VERSION_UUID	1	/* Always latest UUID format */
-#define BCACHE_JSET_VERSION		1
-
-struct jset {
-	__u64			csum;
-	__u64			magic;
-	__u64			seq;
-	__u32			version;
-	__u32			keys;
-
-	__u64			last_seq;
-
-	BKEY_PADDED(uuid_bucket);
-	BKEY_PADDED(btree_root);
-	__u16			btree_level;
-	__u16			pad[3];
-
-	__u64			prio_bucket[MAX_CACHES_PER_SET];
-
-	union {
-		struct bkey	start[0];
-		__u64		d[0];
-	};
-};
-
-/* Bucket prios/gens */
-
-struct prio_set {
-	__u64			csum;
-	__u64			magic;
-	__u64			seq;
-	__u32			version;
-	__u32			pad;
-
-	__u64			next_bucket;
-
-	struct bucket_disk {
-		__u16		prio;
-		__u8		gen;
-	} __attribute((packed)) data[];
-};
-
-/* UUIDS - per backing device/flash only volume metadata */
-
-struct uuid_entry {
-	union {
-		struct {
-			__u8	uuid[16];
-			__u8	label[32];
-			__u32	first_reg; /* time overflow in y2106 */
-			__u32	last_reg;
-			__u32	invalidated;
-
-			__u32	flags;
-			/* Size of flash only volumes */
-			__u64	sectors;
-		};
-
-		__u8		pad[128];
-	};
-};
-
-BITMASK(UUID_FLASH_ONLY,	struct uuid_entry, flags, 0, 1);
-
-/* Btree nodes */
-
-/* Version 1: Seed pointer into btree node checksum
- */
-#define BCACHE_BSET_CSUM		1
-#define BCACHE_BSET_VERSION		1
-
-/*
- * Btree nodes
- *
- * On disk a btree node is a list/log of these; within each set the keys are
- * sorted
- */
-struct bset {
-	__u64			csum;
-	__u64			magic;
-	__u64			seq;
-	__u32			version;
-	__u32			keys;
-
-	union {
-		struct bkey	start[0];
-		__u64		d[0];
-	};
-};
-
-/* OBSOLETE */
-
-/* UUIDS - per backing device/flash only volume metadata */
-
-struct uuid_entry_v0 {
-	__u8		uuid[16];
-	__u8		label[32];
-	__u32		first_reg;
-	__u32		last_reg;
-	__u32		invalidated;
-	__u32		pad;
-};
-
-#endif /* _LINUX_BCACHE_H */
-- 
cgit v1.2.3


From 56fa95014a0447f798444e626091cbeb3176af24 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 25 Oct 2021 15:43:29 +0200
Subject: netfilter: nft_meta: add NFT_META_IFTYPE

Generalize NFT_META_IIFTYPE to NFT_META_IFTYPE which allows you to match
on the interface type of the skb->dev field. This field is used by the
netdev family to add an implicit dependency to skip non-ethernet packets
when matching on layer 3 and 4 TCP/IP header fields.

For backward compatibility, add the NFT_META_IIFTYPE alias to
NFT_META_IFTYPE.

Add __NFT_META_IIFTYPE, to be used by userspace in the future to match
specifically on the iiftype.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index e94d1fa554cb..08db4ee06ab6 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -896,7 +896,8 @@ enum nft_meta_keys {
 	NFT_META_OIF,
 	NFT_META_IIFNAME,
 	NFT_META_OIFNAME,
-	NFT_META_IIFTYPE,
+	NFT_META_IFTYPE,
+#define NFT_META_IIFTYPE	NFT_META_IFTYPE
 	NFT_META_OIFTYPE,
 	NFT_META_SKUID,
 	NFT_META_SKGID,
@@ -923,6 +924,7 @@ enum nft_meta_keys {
 	NFT_META_TIME_HOUR,
 	NFT_META_SDIF,
 	NFT_META_SDIFNAME,
+	__NFT_META_IIFTYPE,
 };
 
 /**
-- 
cgit v1.2.3


From e47be840e87ea15677bca2043ee7b696ccacf56a Mon Sep 17 00:00:00 2001
From: Wu Zongyong <wuzongyong@linux.alibaba.com>
Date: Fri, 29 Oct 2021 17:14:48 +0800
Subject: vdpa: add new attribute VDPA_ATTR_DEV_MIN_VQ_SIZE

This attribute advertises the min value of virtqueue size. The value is
1 by default.

Signed-off-by: Wu Zongyong <wuzongyong@linux.alibaba.com>
Link: https://lore.kernel.org/r/2bbc417355c4d22298050b1ba887cecfbde3e85d.1635493219.git.wuzongyong@linux.alibaba.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/vdpa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
index 66a41e4ec163..e3b87879514c 100644
--- a/include/uapi/linux/vdpa.h
+++ b/include/uapi/linux/vdpa.h
@@ -32,6 +32,7 @@ enum vdpa_attr {
 	VDPA_ATTR_DEV_VENDOR_ID,		/* u32 */
 	VDPA_ATTR_DEV_MAX_VQS,			/* u32 */
 	VDPA_ATTR_DEV_MAX_VQ_SIZE,		/* u16 */
+	VDPA_ATTR_DEV_MIN_VQ_SIZE,		/* u16 */
 
 	/* new attributes must be added above here */
 	VDPA_ATTR_MAX,
-- 
cgit v1.2.3


From c46b38dc8743535e686b911d253a844f0bd50ead Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 28 Oct 2021 22:15:00 +0200
Subject: netfilter: nft_payload: support for inner header matching / mangling

Allow to match and mangle on inner headers / payload data after the
transport header. There is a new field in the pktinfo structure that
stores the inner header offset which is calculated only when requested.
Only TCP and UDP supported at this stage.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 08db4ee06ab6..466fd3f4447c 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -753,11 +753,13 @@ enum nft_dynset_attributes {
  * @NFT_PAYLOAD_LL_HEADER: link layer header
  * @NFT_PAYLOAD_NETWORK_HEADER: network header
  * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header
+ * @NFT_PAYLOAD_INNER_HEADER: inner header / payload
  */
 enum nft_payload_bases {
 	NFT_PAYLOAD_LL_HEADER,
 	NFT_PAYLOAD_NETWORK_HEADER,
 	NFT_PAYLOAD_TRANSPORT_HEADER,
+	NFT_PAYLOAD_INNER_HEADER,
 };
 
 /**
-- 
cgit v1.2.3


From dcce162559ee1ce5f64992c4c65197f9270e3d4f Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 21 Oct 2021 15:17:49 +0530
Subject: i2c: virtio: Add support for zero-length requests

The virtio specification received a new mandatory feature
(VIRTIO_I2C_F_ZERO_LENGTH_REQUEST) for zero length requests. Fail if the
feature isn't offered by the device.

For each read-request, set the VIRTIO_I2C_FLAGS_M_RD flag, as required
by the VIRTIO_I2C_F_ZERO_LENGTH_REQUEST feature.

This allows us to support zero length requests, like SMBUS Quick, where
the buffer need not be sent anymore.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://lore.kernel.org/r/7c58868cd26d2fc4bd82d0d8b0dfb55636380110.1634808714.git.viresh.kumar@linaro.org
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jie Deng <jie.deng@intel.com> # once the spec is merged
---
 include/uapi/linux/virtio_i2c.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_i2c.h b/include/uapi/linux/virtio_i2c.h
index 7c6a6fc01ad6..acf3b6069136 100644
--- a/include/uapi/linux/virtio_i2c.h
+++ b/include/uapi/linux/virtio_i2c.h
@@ -11,9 +11,15 @@
 #include <linux/const.h>
 #include <linux/types.h>
 
+/* Virtio I2C Feature bits */
+#define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST	0
+
 /* The bit 0 of the @virtio_i2c_out_hdr.@flags, used to group the requests */
 #define VIRTIO_I2C_FLAGS_FAIL_NEXT	_BITUL(0)
 
+/* The bit 1 of the @virtio_i2c_out_hdr.@flags, used to mark a buffer as read */
+#define VIRTIO_I2C_FLAGS_M_RD		_BITUL(1)
+
 /**
  * struct virtio_i2c_out_hdr - the virtio I2C message OUT header
  * @addr: the controlled device address
-- 
cgit v1.2.3


From ad69dd0bf26b88ec6ab26f8bbe5cd74fbed7672a Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Tue, 26 Oct 2021 20:55:13 +0300
Subject: vdpa: Introduce query of device config layout

Introduce a command to query a device config layout.

An example query of network vdpa device:

$ vdpa dev add name bar mgmtdev vdpasim_net

$ vdpa dev config show
bar: mac 00:35:09:19:48:05 link up link_announce false mtu 1500

$ vdpa dev config show -jp
{
    "config": {
        "bar": {
            "mac": "00:35:09:19:48:05",
            "link ": "up",
            "link_announce ": false,
            "mtu": 1500,
        }
    }
}

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Eli Cohen <elic@nvidia.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/20211026175519.87795-3-parav@nvidia.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/vdpa.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
index e3b87879514c..a252f06f9dfd 100644
--- a/include/uapi/linux/vdpa.h
+++ b/include/uapi/linux/vdpa.h
@@ -17,6 +17,7 @@ enum vdpa_command {
 	VDPA_CMD_DEV_NEW,
 	VDPA_CMD_DEV_DEL,
 	VDPA_CMD_DEV_GET,		/* can dump */
+	VDPA_CMD_DEV_CONFIG_GET,	/* can dump */
 };
 
 enum vdpa_attr {
@@ -34,6 +35,11 @@ enum vdpa_attr {
 	VDPA_ATTR_DEV_MAX_VQ_SIZE,		/* u16 */
 	VDPA_ATTR_DEV_MIN_VQ_SIZE,		/* u16 */
 
+	VDPA_ATTR_DEV_NET_CFG_MACADDR,		/* binary */
+	VDPA_ATTR_DEV_NET_STATUS,		/* u8 */
+	VDPA_ATTR_DEV_NET_CFG_MAX_VQP,		/* u16 */
+	VDPA_ATTR_DEV_NET_CFG_MTU,		/* u16 */
+
 	/* new attributes must be added above here */
 	VDPA_ATTR_MAX,
 };
-- 
cgit v1.2.3


From b9022b53adad88fd6cf2b9718c9e498504f3e1dd Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 31 Oct 2021 16:00:02 +0000
Subject: amt: add control plane of amt interface

It adds definitions and control plane code for AMT.
this is very similar to udp tunneling interfaces such as gtp, vxlan, etc.
In the next patch, data plane code will be added.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/amt.h | 62 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 include/uapi/linux/amt.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/amt.h b/include/uapi/linux/amt.h
new file mode 100644
index 000000000000..2dccff417195
--- /dev/null
+++ b/include/uapi/linux/amt.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2021 Taehee Yoo <ap420073@gmail.com>
+ */
+#ifndef _UAPI_AMT_H_
+#define _UAPI_AMT_H_
+
+enum ifla_amt_mode {
+	/* AMT interface works as Gateway mode.
+	 * The Gateway mode encapsulates IGMP/MLD traffic and decapsulates
+	 * multicast traffic.
+	 */
+	AMT_MODE_GATEWAY = 0,
+	/* AMT interface works as Relay mode.
+	 * The Relay mode encapsulates multicast traffic and decapsulates
+	 * IGMP/MLD traffic.
+	 */
+	AMT_MODE_RELAY,
+	__AMT_MODE_MAX,
+};
+
+#define AMT_MODE_MAX (__AMT_MODE_MAX - 1)
+
+enum {
+	IFLA_AMT_UNSPEC,
+	/* This attribute specify mode etier Gateway or Relay. */
+	IFLA_AMT_MODE,
+	/* This attribute specify Relay port.
+	 * AMT interface is created as Gateway mode, this attribute is used
+	 * to specify relay(remote) port.
+	 * AMT interface is created as Relay mode, this attribute is used
+	 * as local port.
+	 */
+	IFLA_AMT_RELAY_PORT,
+	/* This attribute specify Gateway port.
+	 * AMT interface is created as Gateway mode, this attribute is used
+	 * as local port.
+	 * AMT interface is created as Relay mode, this attribute is not used.
+	 */
+	IFLA_AMT_GATEWAY_PORT,
+	/* This attribute specify physical device */
+	IFLA_AMT_LINK,
+	/* This attribute specify local ip address */
+	IFLA_AMT_LOCAL_IP,
+	/* This attribute specify Relay ip address.
+	 * So, this is not used by Relay.
+	 */
+	IFLA_AMT_REMOTE_IP,
+	/* This attribute specify Discovery ip address.
+	 * When Gateway get started, it send discovery message to find the
+	 * Relay's ip address.
+	 * So, this is not used by Relay.
+	 */
+	IFLA_AMT_DISCOVERY_IP,
+	/* This attribute specify number of maximum tunnel. */
+	IFLA_AMT_MAX_TUNNELS,
+	__IFLA_AMT_MAX,
+};
+
+#define IFLA_AMT_MAX (__IFLA_AMT_MAX - 1)
+
+#endif /* _UAPI_AMT_H_ */
-- 
cgit v1.2.3


From 8845b4681bf44b9d2d2badf2c67cf476e42a86bd Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannekoong@fb.com>
Date: Fri, 29 Oct 2021 15:49:08 -0700
Subject: bpf: Add alignment padding for "map_extra" + consolidate holes

This patch makes 2 changes regarding alignment padding
for the "map_extra" field.

1) In the kernel header, "map_extra" and "btf_value_type_id"
are rearranged to consolidate the hole.

Before:
struct bpf_map {
	...
        u32		max_entries;	/*    36     4	*/
        u32		map_flags;	/*    40     4	*/

        /* XXX 4 bytes hole, try to pack */

        u64		map_extra;	/*    48     8	*/
        int		spin_lock_off;	/*    56     4	*/
        int		timer_off;	/*    60     4	*/
        /* --- cacheline 1 boundary (64 bytes) --- */
        u32		id;		/*    64     4	*/
        int		numa_node;	/*    68     4	*/
	...
        bool		frozen;		/*   117     1	*/

        /* XXX 10 bytes hole, try to pack */

        /* --- cacheline 2 boundary (128 bytes) --- */
	...
        struct work_struct	work;	/*   144    72	*/

        /* --- cacheline 3 boundary (192 bytes) was 24 bytes ago --- */
	struct mutex	freeze_mutex;	/*   216   144 	*/

        /* --- cacheline 5 boundary (320 bytes) was 40 bytes ago --- */
        u64		writecnt; 	/*   360     8	*/

    /* size: 384, cachelines: 6, members: 26 */
    /* sum members: 354, holes: 2, sum holes: 14 */
    /* padding: 16 */
    /* forced alignments: 2, forced holes: 1, sum forced holes: 10 */

} __attribute__((__aligned__(64)));

After:
struct bpf_map {
	...
        u32		max_entries;	/*    36     4	*/
        u64		map_extra;	/*    40     8 	*/
        u32		map_flags;	/*    48     4	*/
        int		spin_lock_off;	/*    52     4	*/
        int		timer_off;	/*    56     4	*/
        u32		id;		/*    60     4	*/

        /* --- cacheline 1 boundary (64 bytes) --- */
        int		numa_node;	/*    64     4	*/
	...
	bool		frozen		/*   113     1  */

        /* XXX 14 bytes hole, try to pack */

        /* --- cacheline 2 boundary (128 bytes) --- */
	...
        struct work_struct	work;	/*   144    72	*/

        /* --- cacheline 3 boundary (192 bytes) was 24 bytes ago --- */
        struct mutex	freeze_mutex;	/*   216   144	*/

        /* --- cacheline 5 boundary (320 bytes) was 40 bytes ago --- */
        u64		writecnt;       /*   360     8	*/

    /* size: 384, cachelines: 6, members: 26 */
    /* sum members: 354, holes: 1, sum holes: 14 */
    /* padding: 16 */
    /* forced alignments: 2, forced holes: 1, sum forced holes: 14 */

} __attribute__((__aligned__(64)));

2) Add alignment padding to the bpf_map_info struct
More details can be found in commit 36f9814a494a ("bpf: fix uapi hole
for 32 bit compat applications")

Signed-off-by: Joanne Koong <joannekoong@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20211029224909.1721024-3-joannekoong@fb.com
---
 include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bd0c9f0487f6..ba5af15e25f5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5662,6 +5662,7 @@ struct bpf_map_info {
 	__u32 btf_id;
 	__u32 btf_key_type_id;
 	__u32 btf_value_type_id;
+	__u32 :32;	/* alignment pad */
 	__u64 map_extra;
 } __attribute__((aligned(8)));
 
-- 
cgit v1.2.3


From fcdb44d08a95003c3d040aecdee286156ec6f34e Mon Sep 17 00:00:00 2001
From: James Prestwood <prestwoj@gmail.com>
Date: Mon, 1 Nov 2021 10:36:28 -0700
Subject: net: arp: introduce arp_evict_nocarrier sysctl parameter

This change introduces a new sysctl parameter, arp_evict_nocarrier.
When set (default) the ARP cache will be cleared on a NOCARRIER event.
This new option has been defaulted to '1' which maintains existing
behavior.

Clearing the ARP cache on NOCARRIER is relatively new, introduced by:

commit 859bd2ef1fc1110a8031b967ee656c53a6260a76
Author: David Ahern <dsahern@gmail.com>
Date:   Thu Oct 11 20:33:49 2018 -0700

    net: Evict neighbor entries on carrier down

The reason for this changes is to prevent the ARP cache from being
cleared when a wireless device roams. Specifically for wireless roams
the ARP cache should not be cleared because the underlying network has not
changed. Clearing the ARP cache in this case can introduce significant
delays sending out packets after a roam.

A user reported such a situation here:

https://lore.kernel.org/linux-wireless/CACsRnHWa47zpx3D1oDq9JYnZWniS8yBwW1h0WAVZ6vrbwL_S0w@mail.gmail.com/

After some investigation it was found that the kernel was holding onto
packets until ARP finished which resulted in this 1 second delay. It
was also found that the first ARP who-has was never responded to,
which is actually what caues the delay. This change is more or less
working around this behavior, but again, there is no reason to clear
the cache on a roam anyways.

As for the unanswered who-has, we know the packet made it OTA since
it was seen while monitoring. Why it never received a response is
unknown. In any case, since this is a problem on the AP side of things
all that can be done is to work around it until it is solved.

Some background on testing/reproducing the packet delay:

Hardware:
 - 2 access points configured for Fast BSS Transition (Though I don't
   see why regular reassociation wouldn't have the same behavior)
 - Wireless station running IWD as supplicant
 - A device on network able to respond to pings (I used one of the APs)

Procedure:
 - Connect to first AP
 - Ping once to establish an ARP entry
 - Start a tcpdump
 - Roam to second AP
 - Wait for operstate UP event, and note the timestamp
 - Start pinging

Results:

Below is the tcpdump after UP. It was recorded the interface went UP at
10:42:01.432875.

10:42:01.461871 ARP, Request who-has 192.168.254.1 tell 192.168.254.71, length 28
10:42:02.497976 ARP, Request who-has 192.168.254.1 tell 192.168.254.71, length 28
10:42:02.507162 ARP, Reply 192.168.254.1 is-at ac:86:74:55:b0:20, length 46
10:42:02.507185 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 1, length 64
10:42:02.507205 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 2, length 64
10:42:02.507212 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 3, length 64
10:42:02.507219 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 4, length 64
10:42:02.507225 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 5, length 64
10:42:02.507232 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 6, length 64
10:42:02.515373 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 1, length 64
10:42:02.521399 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 2, length 64
10:42:02.521612 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 3, length 64
10:42:02.521941 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 4, length 64
10:42:02.522419 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 5, length 64
10:42:02.523085 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 6, length 64

You can see the first ARP who-has went out very quickly after UP, but
was never responded to. Nearly a second later the kernel retries and
gets a response. Only then do the ping packets go out. If an ARP entry
is manually added prior to UP (after the cache is cleared) it is seen
that the first ping is never responded to, so its not only an issue with
ARP but with data packets in general.

As mentioned prior, the wireless interface was also monitored to verify
the ping/ARP packet made it OTA which was observed to be true.

Signed-off-by: James Prestwood <prestwoj@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ip.h     | 1 +
 include/uapi/linux/sysctl.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
index e42d13b55cf3..e00bbb9c47bb 100644
--- a/include/uapi/linux/ip.h
+++ b/include/uapi/linux/ip.h
@@ -169,6 +169,7 @@ enum
 	IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
 	IPV4_DEVCONF_DROP_GRATUITOUS_ARP,
 	IPV4_DEVCONF_BC_FORWARDING,
+	IPV4_DEVCONF_ARP_EVICT_NOCARRIER,
 	__IPV4_DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index 1e05d3caa712..6a3b194c50fe 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -482,6 +482,7 @@ enum
 	NET_IPV4_CONF_PROMOTE_SECONDARIES=20,
 	NET_IPV4_CONF_ARP_ACCEPT=21,
 	NET_IPV4_CONF_ARP_NOTIFY=22,
+	NET_IPV4_CONF_ARP_EVICT_NOCARRIER=23,
 };
 
 /* /proc/sys/net/ipv4/netfilter */
-- 
cgit v1.2.3


From 18ac597af25e9760b76471524096f5b29eb820e6 Mon Sep 17 00:00:00 2001
From: James Prestwood <prestwoj@gmail.com>
Date: Mon, 1 Nov 2021 10:36:29 -0700
Subject: net: ndisc: introduce ndisc_evict_nocarrier sysctl parameter

In most situations the neighbor discovery cache should be cleared on a
NOCARRIER event which is currently done unconditionally. But for wireless
roams the neighbor discovery cache can and should remain intact since
the underlying network has not changed.

This patch introduces a sysctl option ndisc_evict_nocarrier which can
be disabled by a wireless supplicant during a roam. This allows packets
to be sent after a roam immediately without having to wait for
neighbor discovery.

A user reported roughly a 1 second delay after a roam before packets
could be sent out (note, on IPv4). This delay was due to the ARP
cache being cleared. During testing of this same scenario using IPv6
no delay was noticed, but regardless there is no reason to clear
the ndisc cache for wireless roams.

Signed-off-by: James Prestwood <prestwoj@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ipv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index b243a53fa985..d4178dace0bf 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -193,6 +193,7 @@ enum {
 	DEVCONF_IOAM6_ENABLED,
 	DEVCONF_IOAM6_ID,
 	DEVCONF_IOAM6_ID_WIDE,
+	DEVCONF_NDISC_EVICT_NOCARRIER,
 	DEVCONF_MAX
 };
 
-- 
cgit v1.2.3


From 1aabe578dd86e9f2867c4db4fba9a15f4ba1825d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 2 Nov 2021 15:02:36 -0700
Subject: ethtool: fix ethtool msg len calculation for pause stats

ETHTOOL_A_PAUSE_STAT_MAX is the MAX attribute id,
so we need to subtract non-stats and add one to
get a count (IOW -2+1 == -1).

Otherwise we'll see:

  ethnl cmd 21: calculated reply length 40, but consumed 52

Fixes: 9a27a33027f2 ("ethtool: add standard pause stats")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool_netlink.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index ca5fbb59fa42..999777d32dcf 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -411,7 +411,9 @@ enum {
 	ETHTOOL_A_PAUSE_STAT_TX_FRAMES,
 	ETHTOOL_A_PAUSE_STAT_RX_FRAMES,
 
-	/* add new constants above here */
+	/* add new constants above here
+	 * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats!
+	 */
 	__ETHTOOL_A_PAUSE_STAT_CNT,
 	ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1)
 };
-- 
cgit v1.2.3


From eff5cdd745a68863a73095b0b4d62d15e0d9d902 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 21 Oct 2021 16:34:19 +0530
Subject: gpio: virtio: Add IRQ support

This patch adds IRQ support for the virtio GPIO driver. Note that this
uses the irq_bus_lock/unlock() callbacks, since those operations over
virtio may sleep.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
---
 include/uapi/linux/virtio_gpio.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_gpio.h b/include/uapi/linux/virtio_gpio.h
index 0445f905d8cc..d04af9c5f0de 100644
--- a/include/uapi/linux/virtio_gpio.h
+++ b/include/uapi/linux/virtio_gpio.h
@@ -5,12 +5,16 @@
 
 #include <linux/types.h>
 
+/* Virtio GPIO Feature bits */
+#define VIRTIO_GPIO_F_IRQ			0
+
 /* Virtio GPIO request types */
 #define VIRTIO_GPIO_MSG_GET_NAMES		0x0001
 #define VIRTIO_GPIO_MSG_GET_DIRECTION		0x0002
 #define VIRTIO_GPIO_MSG_SET_DIRECTION		0x0003
 #define VIRTIO_GPIO_MSG_GET_VALUE		0x0004
 #define VIRTIO_GPIO_MSG_SET_VALUE		0x0005
+#define VIRTIO_GPIO_MSG_IRQ_TYPE		0x0006
 
 /* Possible values of the status field */
 #define VIRTIO_GPIO_STATUS_OK			0x0
@@ -21,6 +25,14 @@
 #define VIRTIO_GPIO_DIRECTION_OUT		0x01
 #define VIRTIO_GPIO_DIRECTION_IN		0x02
 
+/* Virtio GPIO IRQ types */
+#define VIRTIO_GPIO_IRQ_TYPE_NONE		0x00
+#define VIRTIO_GPIO_IRQ_TYPE_EDGE_RISING	0x01
+#define VIRTIO_GPIO_IRQ_TYPE_EDGE_FALLING	0x02
+#define VIRTIO_GPIO_IRQ_TYPE_EDGE_BOTH		0x03
+#define VIRTIO_GPIO_IRQ_TYPE_LEVEL_HIGH		0x04
+#define VIRTIO_GPIO_IRQ_TYPE_LEVEL_LOW		0x08
+
 struct virtio_gpio_config {
 	__le16 ngpio;
 	__u8 padding[2];
@@ -44,4 +56,17 @@ struct virtio_gpio_response_get_names {
 	__u8 value[];
 };
 
+/* Virtio GPIO IRQ Request / Response */
+struct virtio_gpio_irq_request {
+	__le16 gpio;
+};
+
+struct virtio_gpio_irq_response {
+	__u8 status;
+};
+
+/* Possible values of the interrupt status field */
+#define VIRTIO_GPIO_IRQ_STATUS_INVALID		0x0
+#define VIRTIO_GPIO_IRQ_STATUS_VALID		0x1
+
 #endif /* _LINUX_VIRTIO_GPIO_H */
-- 
cgit v1.2.3


From 7d0003da6297eb128f3490e396e6fc6df71557cd Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 1 Nov 2021 05:11:25 -0400
Subject: virtio_gpio: drop packed attribute

Declaring the struct packed here is mostly harmless,
but gives a bad example for people to copy.
As the struct is packed and aligned manually,
let's just drop the attribute.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
---
 include/uapi/linux/virtio_gpio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_gpio.h b/include/uapi/linux/virtio_gpio.h
index d04af9c5f0de..d4b29d9a39dd 100644
--- a/include/uapi/linux/virtio_gpio.h
+++ b/include/uapi/linux/virtio_gpio.h
@@ -37,7 +37,7 @@ struct virtio_gpio_config {
 	__le16 ngpio;
 	__u8 padding[2];
 	__le32 gpio_names_size;
-} __packed;
+};
 
 /* Virtio GPIO Request / Response */
 struct virtio_gpio_request {
-- 
cgit v1.2.3


From aedad3e1c6ddec234b63cfb57ac231da0f680e50 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Fri, 5 Nov 2021 16:08:29 -0700
Subject: arm64: mte: change PR_MTE_TCF_NONE back into an unsigned long

This constant was previously an unsigned long, but was changed
into an int in commit 433c38f40f6a ("arm64: mte: change ASYNC and
SYNC TCF settings into bitfields"). This ended up causing spurious
unsigned-signed comparison warnings in expressions such as:

(x & PR_MTE_TCF_MASK) != PR_MTE_TCF_NONE

Therefore, change it back into an unsigned long to silence these
warnings.

Link: https://linux-review.googlesource.com/id/I07a72310db30227a5b7d789d0b817d78b657c639
Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://lore.kernel.org/r/20211105230829.2254790-1-pcc@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/uapi/linux/prctl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 43bd7f713c39..de45fcd2dcbe 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -235,7 +235,7 @@ struct prctl_mm_map {
 #define PR_GET_TAGGED_ADDR_CTRL		56
 # define PR_TAGGED_ADDR_ENABLE		(1UL << 0)
 /* MTE tag check fault modes */
-# define PR_MTE_TCF_NONE		0
+# define PR_MTE_TCF_NONE		0UL
 # define PR_MTE_TCF_SYNC		(1UL << 1)
 # define PR_MTE_TCF_ASYNC		(1UL << 2)
 # define PR_MTE_TCF_MASK		(PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC)
-- 
cgit v1.2.3


From 61082ad6a6e1f999eef7e7e90046486c87933b1e Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 15 Mar 2021 16:57:13 +0100
Subject: virtio-mem: support VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE

The initial virtio-mem spec states that while unplugged memory should not
be read, the device still has to allow for reading unplugged memory inside
the usable region. The primary motivation for this default handling was
to simplify bringup of virtio-mem, because there were corner cases where
Linux might have accidentially read unplugged memory inside added Linux
memory blocks.

In the meantime, we:
1. Removed /dev/kmem in commit bbcd53c96071 ("drivers/char: remove
   /dev/kmem for good")
2. Disallowed access to virtio-mem device memory via /dev/mem in
   commit 2128f4e21aa2 ("virtio-mem: disallow mapping virtio-mem memory via
   /dev/mem")
3. Sanitized access to virtio-mem device memory via /proc/kcore in
   commit 0daa322b8ff9 ("fs/proc/kcore: don't read offline sections,
   logically offline pages and hwpoisoned pages")
4. Sanitized access to virtio-mem device memory via /proc/vmcore in
   commit ce2814622e84 ("virtio-mem: kdump mode to sanitize /proc/vmcore
   access")

"Accidential" access to unplugged memory is no longer possible; we can
support the new VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE feature that will be
required by some hypervisors implementing virtio-mem in the near future.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Marek Kedzierski <mkedzier@redhat.com>
Cc: Hui Zhu <teawater@gmail.com>
Cc: Sebastien Boeuf <sebastien.boeuf@intel.com>
Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 include/uapi/linux/virtio_mem.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h
index 70e01c687d5e..e9122f1d0e0c 100644
--- a/include/uapi/linux/virtio_mem.h
+++ b/include/uapi/linux/virtio_mem.h
@@ -68,9 +68,10 @@
  * explicitly triggered (VIRTIO_MEM_REQ_UNPLUG).
  *
  * There are no guarantees what will happen if unplugged memory is
- * read/written. Such memory should, in general, not be touched. E.g.,
- * even writing might succeed, but the values will simply be discarded at
- * random points in time.
+ * read/written. In general, unplugged memory should not be touched, because
+ * the resulting action is undefined. There is one exception: without
+ * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, unplugged memory inside the usable
+ * region can be read, to simplify creation of memory dumps.
  *
  * It can happen that the device cannot process a request, because it is
  * busy. The device driver has to retry later.
@@ -87,6 +88,8 @@
 
 /* node_id is an ACPI PXM and is valid */
 #define VIRTIO_MEM_F_ACPI_PXM		0
+/* unplugged memory must not be accessed */
+#define VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE	1
 
 
 /* --- virtio-mem: guest -> host requests --- */
-- 
cgit v1.2.3


From b56639318bb2be66aceba92836279714488709b4 Mon Sep 17 00:00:00 2001
From: Peter Gonda <pgonda@google.com>
Date: Thu, 21 Oct 2021 10:43:00 -0700
Subject: KVM: SEV: Add support for SEV intra host migration

For SEV to work with intra host migration, contents of the SEV info struct
such as the ASID (used to index the encryption key in the AMD SP) and
the list of memory regions need to be transferred to the target VM.
This change adds a commands for a target VMM to get a source SEV VM's sev
info.

Signed-off-by: Peter Gonda <pgonda@google.com>
Suggested-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Marc Orr <marcorr@google.com>
Cc: Marc Orr <marcorr@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Jim Mattson <jmattson@google.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Message-Id: <20211021174303.385706-3-pgonda@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 78f0719cc2a3..1daa45268de2 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1130,6 +1130,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_BINARY_STATS_FD 203
 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
 #define KVM_CAP_ARM_MTE 205
+#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From b5f57384805a34f497edb8b04d694a8a1b3d81d4 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 10 Sep 2021 15:18:20 -0400
Subject: drm/amdkfd: Add sysfs bitfields and enums to uAPI

These bits are de-facto part of the uAPI, so declare them in a uAPI header.

The corresponding bit-fields and enums in user mode are defined in
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/master/include/hsakmttypes.h

HSA_CAP_...           -> HSA_CAPABILITY
HSA_MEM_HEAP_TYPE_... -> HSA_HEAPTYPE
HSA_MEM_FLAGS_...     -> HSA_MEMORYPROPERTY
HSA_CACHE_TYPE_...    -> HsaCacheType
HSA_IOLINK_TYPE_...   -> HSA_IOLINKTYPE
HSA_IOLINK_FLAGS_...  -> HSA_LINKPROPERTY

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Jonathan Kim <jonathan.kim@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_sysfs.h | 108 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 108 insertions(+)
 create mode 100644 include/uapi/linux/kfd_sysfs.h

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h
new file mode 100644
index 000000000000..e1fb78b4bf09
--- /dev/null
+++ b/include/uapi/linux/kfd_sysfs.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT WITH Linux-syscall-note */
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_SYSFS_H_INCLUDED
+#define KFD_SYSFS_H_INCLUDED
+
+/* Capability bits in node properties */
+#define HSA_CAP_HOT_PLUGGABLE			0x00000001
+#define HSA_CAP_ATS_PRESENT			0x00000002
+#define HSA_CAP_SHARED_WITH_GRAPHICS		0x00000004
+#define HSA_CAP_QUEUE_SIZE_POW2			0x00000008
+#define HSA_CAP_QUEUE_SIZE_32BIT		0x00000010
+#define HSA_CAP_QUEUE_IDLE_EVENT		0x00000020
+#define HSA_CAP_VA_LIMIT			0x00000040
+#define HSA_CAP_WATCH_POINTS_SUPPORTED		0x00000080
+#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK	0x00000f00
+#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT	8
+#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK	0x00003000
+#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT	12
+
+#define HSA_CAP_DOORBELL_TYPE_PRE_1_0		0x0
+#define HSA_CAP_DOORBELL_TYPE_1_0		0x1
+#define HSA_CAP_DOORBELL_TYPE_2_0		0x2
+#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP		0x00004000
+
+/* Old buggy user mode depends on this being 0 */
+#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED	0x00080000
+
+#define HSA_CAP_MEM_EDCSUPPORTED		0x00100000
+#define HSA_CAP_RASEVENTNOTIFY			0x00200000
+#define HSA_CAP_ASIC_REVISION_MASK		0x03c00000
+#define HSA_CAP_ASIC_REVISION_SHIFT		22
+#define HSA_CAP_SRAM_EDCSUPPORTED		0x04000000
+#define HSA_CAP_SVMAPI_SUPPORTED		0x08000000
+#define HSA_CAP_FLAGS_COHERENTHOSTACCESS	0x10000000
+#define HSA_CAP_RESERVED			0xe00f8000
+
+/* Heap types in memory properties */
+#define HSA_MEM_HEAP_TYPE_SYSTEM	0
+#define HSA_MEM_HEAP_TYPE_FB_PUBLIC	1
+#define HSA_MEM_HEAP_TYPE_FB_PRIVATE	2
+#define HSA_MEM_HEAP_TYPE_GPU_GDS	3
+#define HSA_MEM_HEAP_TYPE_GPU_LDS	4
+#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH	5
+
+/* Flag bits in memory properties */
+#define HSA_MEM_FLAGS_HOT_PLUGGABLE		0x00000001
+#define HSA_MEM_FLAGS_NON_VOLATILE		0x00000002
+#define HSA_MEM_FLAGS_RESERVED			0xfffffffc
+
+/* Cache types in cache properties */
+#define HSA_CACHE_TYPE_DATA		0x00000001
+#define HSA_CACHE_TYPE_INSTRUCTION	0x00000002
+#define HSA_CACHE_TYPE_CPU		0x00000004
+#define HSA_CACHE_TYPE_HSACU		0x00000008
+#define HSA_CACHE_TYPE_RESERVED		0xfffffff0
+
+/* Link types in IO link properties (matches CRAT link types) */
+#define HSA_IOLINK_TYPE_UNDEFINED	0
+#define HSA_IOLINK_TYPE_HYPERTRANSPORT	1
+#define HSA_IOLINK_TYPE_PCIEXPRESS	2
+#define HSA_IOLINK_TYPE_AMBA		3
+#define HSA_IOLINK_TYPE_MIPI		4
+#define HSA_IOLINK_TYPE_QPI_1_1	5
+#define HSA_IOLINK_TYPE_RESERVED1	6
+#define HSA_IOLINK_TYPE_RESERVED2	7
+#define HSA_IOLINK_TYPE_RAPID_IO	8
+#define HSA_IOLINK_TYPE_INFINIBAND	9
+#define HSA_IOLINK_TYPE_RESERVED3	10
+#define HSA_IOLINK_TYPE_XGMI		11
+#define HSA_IOLINK_TYPE_XGOP		12
+#define HSA_IOLINK_TYPE_GZ		13
+#define HSA_IOLINK_TYPE_ETHERNET_RDMA	14
+#define HSA_IOLINK_TYPE_RDMA_OTHER	15
+#define HSA_IOLINK_TYPE_OTHER		16
+
+/* Flag bits in IO link properties (matches CRAT flags, excluding the
+ * bi-directional flag, which is not offially part of the CRAT spec, and
+ * only used internally in KFD)
+ */
+#define HSA_IOLINK_FLAGS_ENABLED		(1 << 0)
+#define HSA_IOLINK_FLAGS_NON_COHERENT		(1 << 1)
+#define HSA_IOLINK_FLAGS_NO_ATOMICS_32_BIT	(1 << 2)
+#define HSA_IOLINK_FLAGS_NO_ATOMICS_64_BIT	(1 << 3)
+#define HSA_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA	(1 << 4)
+#define HSA_IOLINK_FLAGS_RESERVED		0xffffffe0
+
+#endif
-- 
cgit v1.2.3


From 72f6a45202f20f0e1a46b0acb7803369cc53d0b8 Mon Sep 17 00:00:00 2001
From: Xiayu Zhang <Xiayu.Zhang@mediatek.com>
Date: Wed, 1 Dec 2021 10:57:13 +0800
Subject: Fix Comment of ETH_P_802_3_MIN

The description of ETH_P_802_3_MIN is misleading.
The value of EthernetType in Ethernet II frame is more than 0x0600,
the value of Length in 802.3 frame is less than 0x0600.

Signed-off-by: Xiayu Zhang <Xiayu.Zhang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 5da4ee234e0b..c0c2f3ed5729 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -117,7 +117,7 @@
 #define ETH_P_IFE	0xED3E		/* ForCES inter-FE LFB type */
 #define ETH_P_AF_IUCV   0xFBFB		/* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
 
-#define ETH_P_802_3_MIN	0x0600		/* If the value in the ethernet type is less than this value
+#define ETH_P_802_3_MIN	0x0600		/* If the value in the ethernet type is more than this value
 					 * then the frame is Ethernet II. Else it is 802.3 */
 
 /*
-- 
cgit v1.2.3


From 9dcc38e2813e0cd3b195940c98b181ce6ede8f20 Mon Sep 17 00:00:00 2001
From: Drew DeVault <sir@cmpwn.com>
Date: Fri, 10 Dec 2021 14:46:09 -0800
Subject: Increase default MLOCK_LIMIT to 8 MiB

This limit has not been updated since 2008, when it was increased to 64
KiB at the request of GnuPG.  Until recently, the main use-cases for this
feature were (1) preventing sensitive memory from being swapped, as in
GnuPG's use-case; and (2) real-time use-cases.  In the first case, little
memory is called for, and in the second case, the user is generally in a
position to increase it if they need more.

The introduction of IOURING_REGISTER_BUFFERS adds a third use-case:
preparing fixed buffers for high-performance I/O.  This use-case will take
as much of this memory as it can get, but is still limited to 64 KiB by
default, which is very little.  This increases the limit to 8 MB, which
was chosen fairly arbitrarily as a more generous, but still conservative,
default value.

It is also possible to raise this limit in userspace.  This is easily
done, for example, in the use-case of a network daemon: systemd, for
instance, provides for this via LimitMEMLOCK in the service file; OpenRC
via the rc_ulimit variables.  However, there is no established userspace
facility for configuring this outside of daemons: end-user applications do
not presently have access to a convenient means of raising their limits.

The buck, as it were, stops with the kernel.  It's much easier to address
it here than it is to bring it to hundreds of distributions, and it can
only realistically be relied upon to be high-enough by end-user software
if it is more-or-less ubiquitous.  Most distros don't change this
particular rlimit from the kernel-supplied default value, so a change here
will easily provide that ubiquity.

Link: https://lkml.kernel.org/r/20211028080813.15966-1-sir@cmpwn.com
Signed-off-by: Drew DeVault <sir@cmpwn.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Acked-by: Cyril Hrubis <chrubis@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Pavel Begunkov <asml.silence@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Andrew Dona-Couch <andrew@donacou.ch>
Cc: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/resource.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/resource.h b/include/uapi/linux/resource.h
index 74ef57b38f9f..ac5d6a3031db 100644
--- a/include/uapi/linux/resource.h
+++ b/include/uapi/linux/resource.h
@@ -66,10 +66,17 @@ struct rlimit64 {
 #define _STK_LIM	(8*1024*1024)
 
 /*
- * GPG2 wants 64kB of mlocked memory, to make sure pass phrases
- * and other sensitive information are never written to disk.
+ * Limit the amount of locked memory by some sane default:
+ * root can always increase this limit if needed.
+ *
+ * The main use-cases are (1) preventing sensitive memory
+ * from being swapped; (2) real-time operations; (3) via
+ * IOURING_REGISTER_BUFFERS.
+ *
+ * The first two don't need much. The latter will take as
+ * much as it can get. 8MB is a reasonably sane default.
  */
-#define MLOCK_LIMIT	((PAGE_SIZE > 64*1024) ? PAGE_SIZE : 64*1024)
+#define MLOCK_LIMIT	(8*1024*1024)
 
 /*
  * Due to binary compatibility, the actual resource numbers
-- 
cgit v1.2.3