From f64c4acea51fbe2c08c0b0f48b7f5d1657d7a5e4 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Fri, 10 Sep 2021 01:04:08 +0300 Subject: bpf: Add hardware timestamp field to __sk_buff BPF programs may want to know hardware timestamps if NIC supports such timestamping. Expose this data as hwtstamp field of __sk_buff the same way as gso_segs/gso_size. This field could be accessed from the same programs as tstamp field, but it's read-only field. Explicit test to deny access to padding data is added to bpf_skb_is_valid_access. Also update BPF_PROG_TEST_RUN tests of the feature. Signed-off-by: Vadim Fedorenko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210909220409.8804-2-vfedorenko@novek.ru --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 791f31dd0abe..51cfd91cc387 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5284,6 +5284,8 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; + __u32 :32; /* Padding, future use. */ + __u64 hwtstamp; }; struct bpf_tunnel_key { -- cgit v1.2.3 From dd47c104533dedb90434a3f142e94a671ac623a6 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Mon, 13 Sep 2021 17:44:15 +0200 Subject: io-wq: provide IO_WQ_* constants for IORING_REGISTER_IOWQ_MAX_WORKERS arg items The items passed in the array pointed by the arg parameter of IORING_REGISTER_IOWQ_MAX_WORKERS io_uring_register operation carry certain semantics: they refer to different io-wq worker categories; provide IO_WQ_* constants in the UAPI, so these categories can be referenced in the user space code. Suggested-by: Jens Axboe Complements: 2e480058ddc21ec5 ("io-wq: provide a way to limit max number of workers") Signed-off-by: Eugene Syromiatnikov Link: https://lore.kernel.org/r/20210913154415.GA12890@asgard.redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 59ef35154e3d..b270a07b285e 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -317,13 +317,19 @@ enum { IORING_REGISTER_IOWQ_AFF = 17, IORING_UNREGISTER_IOWQ_AFF = 18, - /* set/get max number of workers */ + /* set/get max number of io-wq workers */ IORING_REGISTER_IOWQ_MAX_WORKERS = 19, /* this goes last */ IORING_REGISTER_LAST }; +/* io-wq worker categories */ +enum { + IO_WQ_BOUND, + IO_WQ_UNBOUND, +}; + /* deprecated, see struct io_uring_rsrc_update */ struct io_uring_files_update { __u32 offset; -- cgit v1.2.3 From 856c02dbce4f8d6a5644083db22c11750aa11481 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 10 Sep 2021 11:33:51 -0700 Subject: bpf: Introduce helper bpf_get_branch_snapshot Introduce bpf_get_branch_snapshot(), which allows tracing pogram to get branch trace from hardware (e.g. Intel LBR). To use the feature, the user need to create perf_event with proper branch_record filtering on each cpu, and then calls bpf_get_branch_snapshot in the bpf function. On Intel CPUs, VLBR event (raw event 0x1b00) can be use for this. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210910183352.3151445-3-songliubraving@fb.com --- include/uapi/linux/bpf.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 51cfd91cc387..d21326558d42 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4877,6 +4877,27 @@ union bpf_attr { * Get the struct pt_regs associated with **task**. * Return * A pointer to struct pt_regs. + * + * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags) + * Description + * Get branch trace from hardware engines like Intel LBR. The + * hardware engine is stopped shortly after the helper is + * called. Therefore, the user need to filter branch entries + * based on the actual use case. To capture branch trace + * before the trigger point of the BPF program, the helper + * should be called at the beginning of the BPF program. + * + * The data is stored as struct perf_branch_entry into output + * buffer *entries*. *size* is the size of *entries* in bytes. + * *flags* is reserved for now and must be zero. + * + * Return + * On success, number of bytes written to *buf*. On error, a + * negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-ENOENT** if architecture does not support branch records. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5055,6 +5076,7 @@ union bpf_attr { FN(get_func_ip), \ FN(get_attach_cookie), \ FN(task_pt_regs), \ + FN(get_branch_snapshot), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 099dd788e31b4f426ef49c2785069804925a84e1 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 13 Sep 2021 14:51:10 -0500 Subject: cifs: remove pathname for file from SPDX header checkpatch complains about source files with filenames (e.g. in these cases just below the SPDX header in comments at the top of various files in fs/cifs). It also is helpful to change this now so will be less confusing when the parent directory is renamed e.g. from fs/cifs to fs/smb_client (or fs/smbfs) Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- include/uapi/linux/cifs/cifs_mount.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cifs/cifs_mount.h b/include/uapi/linux/cifs/cifs_mount.h index 69829205fdb5..8e87d27b0951 100644 --- a/include/uapi/linux/cifs/cifs_mount.h +++ b/include/uapi/linux/cifs/cifs_mount.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */ /* - * include/uapi/linux/cifs/cifs_mount.h * * Author(s): Scott Lovenberg (scott.lovenberg@gmail.com) * -- cgit v1.2.3 From b564171ade70570b7f335fa8ed17adb28409e3ac Mon Sep 17 00:00:00 2001 From: Li Li Date: Fri, 10 Sep 2021 09:42:10 -0700 Subject: binder: fix freeze race Currently cgroup freezer is used to freeze the application threads, and BINDER_FREEZE is used to freeze the corresponding binder interface. There's already a mechanism in ioctl(BINDER_FREEZE) to wait for any existing transactions to drain out before actually freezing the binder interface. But freezing an app requires 2 steps, freezing the binder interface with ioctl(BINDER_FREEZE) and then freezing the application main threads with cgroupfs. This is not an atomic operation. The following race issue might happen. 1) Binder interface is frozen by ioctl(BINDER_FREEZE); 2) Main thread A initiates a new sync binder transaction to process B; 3) Main thread A is frozen by "echo 1 > cgroup.freeze"; 4) The response from process B reaches the frozen thread, which will unexpectedly fail. This patch provides a mechanism to check if there's any new pending transaction happening between ioctl(BINDER_FREEZE) and freezing the main thread. If there's any, the main thread freezing operation can be rolled back to finish the pending transaction. Furthermore, the response might reach the binder driver before the rollback actually happens. That will still cause failed transaction. As the other process doesn't wait for another response of the response, the response transaction failure can be fixed by treating the response transaction like an oneway/async one, allowing it to reach the frozen thread. And it will be consumed when the thread gets unfrozen later. NOTE: This patch reuses the existing definition of struct binder_frozen_status_info but expands the bit assignments of __u32 member sync_recv. To ensure backward compatibility, bit 0 of sync_recv still indicates there's an outstanding sync binder transaction. This patch adds new information to bit 1 of sync_recv, indicating the binder transaction happens exactly when there's a race. If an existing userspace app runs on a new kernel, a sync binder call will set bit 0 of sync_recv so ioctl(BINDER_GET_FROZEN_INFO) still return the expected value (true). The app just doesn't check bit 1 intentionally so it doesn't have the ability to tell if there's a race. This behavior is aligned with what happens on an old kernel which doesn't set bit 1 at all. A new userspace app can 1) check bit 0 to know if there's a sync binder transaction happened when being frozen - same as before; and 2) check bit 1 to know if that sync binder transaction happened exactly when there's a race - a new information for rollback decision. the same time, confirmed the pending transactions succeeded. Fixes: 432ff1e91694 ("binder: BINDER_FREEZE ioctl") Acked-by: Todd Kjos Cc: stable Signed-off-by: Li Li Test: stress test with apps being frozen and initiating binder calls at Link: https://lore.kernel.org/r/20210910164210.2282716-2-dualli@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 20e435fe657a..3246f2c74696 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -225,7 +225,14 @@ struct binder_freeze_info { struct binder_frozen_status_info { __u32 pid; + + /* process received sync transactions since last frozen + * bit 0: received sync transaction after being frozen + * bit 1: new pending sync transaction during freezing + */ __u32 sync_recv; + + /* process received async transactions since last frozen */ __u32 async_recv; }; -- cgit v1.2.3 From 844f7eaaed9267ae17d33778efe65548cc940205 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Sun, 12 Sep 2021 14:22:34 +0200 Subject: include/uapi/linux/xfrm.h: Fix XFRM_MSG_MAPPING ABI breakage Commit 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") broke ABI by changing the value of the XFRM_MSG_MAPPING enum item, thus also evading the build-time check in security/selinux/nlmsgtab.c:selinux_nlmsg_lookup for presence of proper security permission checks in nlmsg_xfrm_perms. Fix it by placing XFRM_MSG_SETDEFAULT/XFRM_MSG_GETDEFAULT to the end of the enum, right before __XFRM_MSG_MAX, and updating the nlmsg_xfrm_perms accordingly. Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") References: https://lore.kernel.org/netdev/20210901151402.GA2557@altlinux.org/ Signed-off-by: Eugene Syromiatnikov Acked-by: Antony Antony Acked-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index b96c1ea7166d..26f456b1f33e 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -213,13 +213,13 @@ enum { XFRM_MSG_GETSPDINFO, #define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO + XFRM_MSG_MAPPING, +#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING + XFRM_MSG_SETDEFAULT, #define XFRM_MSG_SETDEFAULT XFRM_MSG_SETDEFAULT XFRM_MSG_GETDEFAULT, #define XFRM_MSG_GETDEFAULT XFRM_MSG_GETDEFAULT - - XFRM_MSG_MAPPING, -#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING __XFRM_MSG_MAX }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) -- cgit v1.2.3 From 059ebe4fe332c5d1c25124166527cdf9fe43a3ce Mon Sep 17 00:00:00 2001 From: Andra Paraschiv Date: Fri, 27 Aug 2021 18:49:29 +0300 Subject: nitro_enclaves: Add fixes for checkpatch spell check reports Fix the typos in the words spelling as per the checkpatch script reports. Reviewed-by: George-Aurelian Popescu Signed-off-by: Andra Paraschiv Link: https://lore.kernel.org/r/20210827154930.40608-7-andraprs@amazon.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/nitro_enclaves.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nitro_enclaves.h b/include/uapi/linux/nitro_enclaves.h index b945073fe544..e808f5ba124d 100644 --- a/include/uapi/linux/nitro_enclaves.h +++ b/include/uapi/linux/nitro_enclaves.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Copyright 2020-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. */ #ifndef _UAPI_LINUX_NITRO_ENCLAVES_H_ @@ -60,7 +60,7 @@ * * Context: Process context. * Return: - * * 0 - Logic succesfully completed. + * * 0 - Logic successfully completed. * * -1 - There was a failure in the ioctl logic. * On failure, errno is set to: * * EFAULT - copy_from_user() / copy_to_user() failure. @@ -95,7 +95,7 @@ * * Context: Process context. * Return: - * * 0 - Logic succesfully completed. + * * 0 - Logic successfully completed. * * -1 - There was a failure in the ioctl logic. * On failure, errno is set to: * * EFAULT - copy_from_user() / copy_to_user() failure. @@ -118,7 +118,7 @@ * * Context: Process context. * Return: - * * 0 - Logic succesfully completed. + * * 0 - Logic successfully completed. * * -1 - There was a failure in the ioctl logic. * On failure, errno is set to: * * EFAULT - copy_from_user() failure. @@ -161,7 +161,7 @@ * * Context: Process context. * Return: - * * 0 - Logic succesfully completed. + * * 0 - Logic successfully completed. * * -1 - There was a failure in the ioctl logic. * On failure, errno is set to: * * EFAULT - copy_from_user() / copy_to_user() failure. -- cgit v1.2.3 From fa0866625543b4d8b3d026e4e0ef5ec25a453920 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Tue, 14 Sep 2021 10:35:05 +0200 Subject: net/smc: add support for user defined EIDs SMC-Dv2 allows users to define EIDs which allows to create separate name spaces enabling users to cluster their SMC-Dv2 connections. Add support for user defined EIDs and extent the generic netlink interface so users can add, remove and dump EIDs. Signed-off-by: Karsten Graul Reviewed-by: Guvenc Gulce Signed-off-by: Guvenc Gulce Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 0f7f87c70baf..e3728af2832b 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -38,6 +38,9 @@ enum { /* SMC PNET Table commands */ #define SMC_GENL_FAMILY_VERSION 1 #define SMC_PCI_ID_STR_LEN 16 /* Max length of pci id string */ +#define SMC_MAX_HOSTNAME_LEN 32 /* Max length of the hostname */ +#define SMC_MAX_UEID 4 /* Max number of user EIDs */ +#define SMC_MAX_EID_LEN 32 /* Max length of an EID */ /* SMC_GENL_FAMILY commands */ enum { @@ -49,6 +52,10 @@ enum { SMC_NETLINK_GET_DEV_SMCR, SMC_NETLINK_GET_STATS, SMC_NETLINK_GET_FBACK_STATS, + SMC_NETLINK_DUMP_UEID, + SMC_NETLINK_ADD_UEID, + SMC_NETLINK_REMOVE_UEID, + SMC_NETLINK_FLUSH_UEID, }; /* SMC_GENL_FAMILY top level attributes */ @@ -242,4 +249,12 @@ enum { __SMC_NLA_FBACK_STATS_MAX, SMC_NLA_FBACK_STATS_MAX = __SMC_NLA_FBACK_STATS_MAX - 1 }; + +/* SMC_NETLINK_UEID attributes */ +enum { + SMC_NLA_EID_TABLE_UNSPEC, + SMC_NLA_EID_TABLE_ENTRY, /* string */ + __SMC_NLA_EID_TABLE_MAX, + SMC_NLA_EID_TABLE_MAX = __SMC_NLA_EID_TABLE_MAX - 1 +}; #endif /* _UAPI_LINUX_SMC_H */ -- cgit v1.2.3 From 3c572145c24e21c24e1cd0fd168011eaba85da8e Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Tue, 14 Sep 2021 10:35:07 +0200 Subject: net/smc: add generic netlink support for system EID With SMC-Dv2 users can configure if the static system EID should be used during CLC handshake, or if only user EIDs are allowed. Add generic netlink support to enable and disable the system EID, and to retrieve the system EID and its current enabled state. Signed-off-by: Karsten Graul Reviewed-by: Guvenc Gulce Signed-off-by: Guvenc Gulce Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index e3728af2832b..b175bd0165a1 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -56,6 +56,9 @@ enum { SMC_NETLINK_ADD_UEID, SMC_NETLINK_REMOVE_UEID, SMC_NETLINK_FLUSH_UEID, + SMC_NETLINK_DUMP_SEID, + SMC_NETLINK_ENABLE_SEID, + SMC_NETLINK_DISABLE_SEID, }; /* SMC_GENL_FAMILY top level attributes */ @@ -257,4 +260,13 @@ enum { __SMC_NLA_EID_TABLE_MAX, SMC_NLA_EID_TABLE_MAX = __SMC_NLA_EID_TABLE_MAX - 1 }; + +/* SMC_NETLINK_SEID attributes */ +enum { + SMC_NLA_SEID_UNSPEC, + SMC_NLA_SEID_ENTRY, /* string */ + SMC_NLA_SEID_ENABLED, /* u8 */ + __SMC_NLA_SEID_TABLE_MAX, + SMC_NLA_SEID_TABLE_MAX = __SMC_NLA_SEID_TABLE_MAX - 1 +}; #endif /* _UAPI_LINUX_SMC_H */ -- cgit v1.2.3 From 41ced4cd88020c9d4b71ff7c50d020f081efa4a0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 14 Sep 2021 15:30:09 -0700 Subject: btf: Change BTF_KIND_* macros to enums Change BTF_KIND_* macros to enums so they are encoded in dwarf and appear in vmlinux.h. This will make it easier for bpf programs to use these constants without macro definitions. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210914223009.245307-1-yhs@fb.com --- include/uapi/linux/btf.h | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index d27b1708efe9..10e401073dd1 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -56,25 +56,28 @@ struct btf_type { #define BTF_INFO_VLEN(info) ((info) & 0xffff) #define BTF_INFO_KFLAG(info) ((info) >> 31) -#define BTF_KIND_UNKN 0 /* Unknown */ -#define BTF_KIND_INT 1 /* Integer */ -#define BTF_KIND_PTR 2 /* Pointer */ -#define BTF_KIND_ARRAY 3 /* Array */ -#define BTF_KIND_STRUCT 4 /* Struct */ -#define BTF_KIND_UNION 5 /* Union */ -#define BTF_KIND_ENUM 6 /* Enumeration */ -#define BTF_KIND_FWD 7 /* Forward */ -#define BTF_KIND_TYPEDEF 8 /* Typedef */ -#define BTF_KIND_VOLATILE 9 /* Volatile */ -#define BTF_KIND_CONST 10 /* Const */ -#define BTF_KIND_RESTRICT 11 /* Restrict */ -#define BTF_KIND_FUNC 12 /* Function */ -#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ -#define BTF_KIND_VAR 14 /* Variable */ -#define BTF_KIND_DATASEC 15 /* Section */ -#define BTF_KIND_FLOAT 16 /* Floating point */ -#define BTF_KIND_MAX BTF_KIND_FLOAT -#define NR_BTF_KINDS (BTF_KIND_MAX + 1) +enum { + BTF_KIND_UNKN = 0, /* Unknown */ + BTF_KIND_INT = 1, /* Integer */ + BTF_KIND_PTR = 2, /* Pointer */ + BTF_KIND_ARRAY = 3, /* Array */ + BTF_KIND_STRUCT = 4, /* Struct */ + BTF_KIND_UNION = 5, /* Union */ + BTF_KIND_ENUM = 6, /* Enumeration */ + BTF_KIND_FWD = 7, /* Forward */ + BTF_KIND_TYPEDEF = 8, /* Typedef */ + BTF_KIND_VOLATILE = 9, /* Volatile */ + BTF_KIND_CONST = 10, /* Const */ + BTF_KIND_RESTRICT = 11, /* Restrict */ + BTF_KIND_FUNC = 12, /* Function */ + BTF_KIND_FUNC_PROTO = 13, /* Function Proto */ + BTF_KIND_VAR = 14, /* Variable */ + BTF_KIND_DATASEC = 15, /* Section */ + BTF_KIND_FLOAT = 16, /* Floating point */ + + NR_BTF_KINDS, + BTF_KIND_MAX = NR_BTF_KINDS - 1, +}; /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. -- cgit v1.2.3 From b5ea834dde6b6e7f75e51d5f66dac8cd7c97b5ef Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 14 Sep 2021 15:30:15 -0700 Subject: bpf: Support for new btf kind BTF_KIND_TAG LLVM14 added support for a new C attribute ([1]) __attribute__((btf_tag("arbitrary_str"))) This attribute will be emitted to dwarf ([2]) and pahole will convert it to BTF. Or for bpf target, this attribute will be emitted to BTF directly ([3], [4]). The attribute is intended to provide additional information for - struct/union type or struct/union member - static/global variables - static/global function or function parameter. For linux kernel, the btf_tag can be applied in various places to specify user pointer, function pre- or post- condition, function allow/deny in certain context, etc. Such information will be encoded in vmlinux BTF and can be used by verifier. The btf_tag can also be applied to bpf programs to help global verifiable functions, e.g., specifying preconditions, etc. This patch added basic parsing and checking support in kernel for new BTF_KIND_TAG kind. [1] https://reviews.llvm.org/D106614 [2] https://reviews.llvm.org/D106621 [3] https://reviews.llvm.org/D106622 [4] https://reviews.llvm.org/D109560 Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210914223015.245546-1-yhs@fb.com --- include/uapi/linux/btf.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 10e401073dd1..642b6ecb37d7 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO and VAR. + * FUNC, FUNC_PROTO, VAR and TAG. * "type" is a type_id referring to another type. */ union { @@ -74,6 +74,7 @@ enum { BTF_KIND_VAR = 14, /* Variable */ BTF_KIND_DATASEC = 15, /* Section */ BTF_KIND_FLOAT = 16, /* Floating point */ + BTF_KIND_TAG = 17, /* Tag */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, @@ -173,4 +174,15 @@ struct btf_var_secinfo { __u32 size; }; +/* BTF_KIND_TAG is followed by a single "struct btf_tag" to describe + * additional information related to the tag applied location. + * If component_idx == -1, the tag is applied to a struct, union, + * variable or function. Otherwise, it is applied to a struct/union + * member or a func argument, and component_idx indicates which member + * or argument (0 ... vlen-1). + */ +struct btf_tag { + __s32 component_idx; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ -- cgit v1.2.3 From 67f1e027c27054e641584655020a417eaac9cb3a Mon Sep 17 00:00:00 2001 From: Lukas Prediger Date: Tue, 14 Sep 2021 00:09:42 +0100 Subject: drivers/cdrom: improved ioctl for media change detection The current implementation of the CDROM_MEDIA_CHANGED ioctl relies on global state, meaning that only one process can detect a disc change while the ioctl call will return 0 for other calling processes afterwards (see bug 213267). This introduces a new cdrom ioctl, CDROM_TIMED_MEDIA_CHANGE, that works by maintaining a timestamp of the last detected disc change instead of a boolean flag: Processes calling this ioctl command can provide a timestamp of the last disc change known to them and receive an indication whether the disc was changed since then and the updated timestamp. I considered fixing the buggy behavior in the original CDROM_MEDIA_CHANGED ioctl but that would require maintaining state for each calling process in the kernel, which seems like a worse solution than introducing this new ioctl. Signed-off-by: Lukas Prediger Link: https://lore.kernel.org/all/20210912191207.74449-1-lumip@lumip.de Signed-off-by: Phillip Potter Link: https://lore.kernel.org/r/20210913230942.1188-1-phil@philpotter.co.uk Signed-off-by: Jens Axboe --- include/uapi/linux/cdrom.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h index 6c34f6e2f1f7..804ff8d98f71 100644 --- a/include/uapi/linux/cdrom.h +++ b/include/uapi/linux/cdrom.h @@ -147,6 +147,8 @@ #define CDROM_NEXT_WRITABLE 0x5394 /* get next writable block */ #define CDROM_LAST_WRITTEN 0x5395 /* get last block written on disc */ +#define CDROM_TIMED_MEDIA_CHANGE 0x5396 /* get the timestamp of the last media change */ + /******************************************************* * CDROM IOCTL structures *******************************************************/ @@ -295,6 +297,23 @@ struct cdrom_generic_command }; }; +/* This struct is used by CDROM_TIMED_MEDIA_CHANGE */ +struct cdrom_timed_media_change_info { + __s64 last_media_change; /* Timestamp of the last detected media + * change in ms. May be set by caller, + * updated upon successful return of + * ioctl. + */ + __u64 media_flags; /* Flags returned by ioctl to indicate + * media status. + */ +}; +#define MEDIA_CHANGED_FLAG 0x1 /* Last detected media change was more + * recent than last_media_change set by + * caller. + */ +/* other bits of media_flags available for future use */ + /* * A CD-ROM physical sector size is 2048, 2052, 2056, 2324, 2332, 2336, * 2340, or 2352 bytes long. -- cgit v1.2.3 From f8d858e607b2a36808ac6d4218f5f5203d7a7d63 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 14 Sep 2021 16:46:33 +0200 Subject: xfrm: make user policy API complete >From a userland POV, this API was based on some magic values: - dirmask and action were bitfields but meaning of bits (XFRM_POL_DEFAULT_*) are not exported; - action is confusing, if a bit is set, does it mean drop or accept? Let's try to simplify this uapi by using explicit field and macros. Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 26f456b1f33e..eda0426ec4c2 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -514,9 +514,12 @@ struct xfrm_user_offload { #define XFRM_OFFLOAD_INBOUND 2 struct xfrm_userpolicy_default { -#define XFRM_USERPOLICY_DIRMASK_MAX (sizeof(__u8) * 8) - __u8 dirmask; - __u8 action; +#define XFRM_USERPOLICY_UNSPEC 0 +#define XFRM_USERPOLICY_BLOCK 1 +#define XFRM_USERPOLICY_ACCEPT 2 + __u8 in; + __u8 fwd; + __u8 out; }; #ifndef __KERNEL__ -- cgit v1.2.3 From 336562752acc1a723f9a24b5b8129ae22e0478c6 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Wed, 15 Sep 2021 01:54:00 +0200 Subject: bpf: Update bpf_get_smp_processor_id() documentation BPF programs run with migration disabled regardless of preemption, as they are protected by migrate_disable(). Update the uapi documentation accordingly. Signed-off-by: Matteo Croce Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210914235400.59427-1-mcroce@linux.microsoft.com --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d21326558d42..3e9785f1064a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1629,7 +1629,7 @@ union bpf_attr { * u32 bpf_get_smp_processor_id(void) * Description * Get the SMP (symmetric multiprocessing) processor id. Note that - * all programs run with preemption disabled, which means that the + * all programs run with migration disabled, which means that the * SMP processor id is stable during all the execution of the * program. * Return -- cgit v1.2.3 From 227b9644ab16d2ecd98d593edbe15c32c0c9620a Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 16 Sep 2021 11:37:38 +0800 Subject: net/tls: support SM4 GCM/CCM algorithm The RFC8998 specification defines the use of the ShangMi algorithm cipher suites in TLS 1.3, and also supports the GCM/CCM mode using the SM4 algorithm. Signed-off-by: Tianjia Zhang Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/tls.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index 0d54baea1d8d..5f38be0ec0f3 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -84,6 +84,20 @@ #define TLS_CIPHER_CHACHA20_POLY1305_TAG_SIZE 16 #define TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE 8 +#define TLS_CIPHER_SM4_GCM 55 +#define TLS_CIPHER_SM4_GCM_IV_SIZE 8 +#define TLS_CIPHER_SM4_GCM_KEY_SIZE 16 +#define TLS_CIPHER_SM4_GCM_SALT_SIZE 4 +#define TLS_CIPHER_SM4_GCM_TAG_SIZE 16 +#define TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE 8 + +#define TLS_CIPHER_SM4_CCM 56 +#define TLS_CIPHER_SM4_CCM_IV_SIZE 8 +#define TLS_CIPHER_SM4_CCM_KEY_SIZE 16 +#define TLS_CIPHER_SM4_CCM_SALT_SIZE 4 +#define TLS_CIPHER_SM4_CCM_TAG_SIZE 16 +#define TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE 8 + #define TLS_SET_RECORD_TYPE 1 #define TLS_GET_RECORD_TYPE 2 @@ -124,6 +138,22 @@ struct tls12_crypto_info_chacha20_poly1305 { unsigned char rec_seq[TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE]; }; +struct tls12_crypto_info_sm4_gcm { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_SM4_GCM_IV_SIZE]; + unsigned char key[TLS_CIPHER_SM4_GCM_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_SM4_GCM_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE]; +}; + +struct tls12_crypto_info_sm4_ccm { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_SM4_CCM_IV_SIZE]; + unsigned char key[TLS_CIPHER_SM4_CCM_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_SM4_CCM_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE]; +}; + enum { TLS_INFO_UNSPEC, TLS_INFO_VERSION, -- cgit v1.2.3 From 10aceb629e198429c849d5e995c3bb1ba7a9aaa3 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Fri, 17 Sep 2021 11:29:05 -0700 Subject: bpf: Add bpf_trace_vprintk helper This helper is meant to be "bpf_trace_printk, but with proper vararg support". Follow bpf_snprintf's example and take a u64 pseudo-vararg array. Write to /sys/kernel/debug/tracing/trace_pipe using the same mechanism as bpf_trace_printk. The functionality of this helper was requested in the libbpf issue tracker [0]. [0] Closes: https://github.com/libbpf/libbpf/issues/315 Signed-off-by: Dave Marchevsky Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210917182911.2426606-4-davemarchevsky@fb.com --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3e9785f1064a..98ca79a67937 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4898,6 +4898,16 @@ union bpf_attr { * **-EINVAL** if *flags* is not zero. * * **-ENOENT** if architecture does not support branch records. + * + * long bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * Description + * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64 + * to format and can handle more format args as a result. + * + * Arguments are to be used as in **bpf_seq_printf**\ () helper. + * Return + * The number of bytes written to the buffer, or a negative error + * in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5077,6 +5087,7 @@ union bpf_attr { FN(get_attach_cookie), \ FN(task_pt_regs), \ FN(get_branch_snapshot), \ + FN(trace_vprintk), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From a42effb0b24fcaf49513c2d7d77ef6daa9e32a6f Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Fri, 17 Sep 2021 11:29:11 -0700 Subject: bpf: Clarify data_len param in bpf_snprintf and bpf_seq_printf comments Since the data_len in these two functions is a byte len of the preceding u64 *data array, it must always be a multiple of 8. If this isn't the case both helpers error out, so let's make the requirement explicit so users don't need to infer it. Signed-off-by: Dave Marchevsky Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210917182911.2426606-10-davemarchevsky@fb.com --- include/uapi/linux/bpf.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 98ca79a67937..6fc59d61937a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4046,7 +4046,7 @@ union bpf_attr { * arguments. The *data* are a **u64** array and corresponding format string * values are stored in the array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* array. - * The *data_len* is the size of *data* in bytes. + * The *data_len* is the size of *data* in bytes - must be a multiple of 8. * * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. * Reading kernel memory may fail due to either invalid address or @@ -4751,7 +4751,8 @@ union bpf_attr { * Each format specifier in **fmt** corresponds to one u64 element * in the **data** array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* - * array. The *data_len* is the size of *data* in bytes. + * array. The *data_len* is the size of *data* in bytes - must be + * a multiple of 8. * * Formats **%s** and **%p{i,I}{4,6}** require to read kernel * memory. Reading kernel memory may fail due to either invalid -- cgit v1.2.3 From 55c42fa7fa331f98062c32799456420930b8bf8c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 17 Sep 2021 16:33:19 -0700 Subject: mptcp: add MPTCP_INFO getsockopt Its not compatible with multipath-tcp.org kernel one. 1. The out-of-tree implementation defines a different 'struct mptcp_info', with embedded __user addresses for additional data such as endpoint addresses. 2. Mat Martineau points out that embedded __user addresses doesn't work with BPF_CGROUP_RUN_PROG_GETSOCKOPT() which assumes that copying in optsize bytes from optval provides all data that got copied to userspace. This provides mptcp_info data for the given mptcp socket. Userspace sets optlen to the size of the structure it expects. The kernel updates it to contain the number of bytes that it copied. This allows to append more information to the structure later. Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index f66038b9551f..3e9caeddda7e 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -193,4 +193,7 @@ enum mptcp_event_attr { #define MPTCP_RST_EBADPERF 5 #define MPTCP_RST_EMIDDLEBOX 6 +/* MPTCP socket options */ +#define MPTCP_INFO 1 + #endif /* _UAPI_MPTCP_H */ -- cgit v1.2.3 From 06f15cee369535a383c9c82ed37a25f0a413f6f1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 17 Sep 2021 16:33:20 -0700 Subject: mptcp: add MPTCP_TCPINFO getsockopt support Allow users to retrieve TCP_INFO data of all subflows. Users need to pre-initialize a meta header that has to be prepended to the data buffer that will be filled with the tcp info data. The meta header looks like this: struct mptcp_subflow_data { __u32 size_subflow_data;/* size of this structure in userspace */ __u32 num_subflows; /* must be 0, set by kernel */ __u32 size_kernel; /* must be 0, set by kernel */ __u32 size_user; /* size of one element in data[] */ } __attribute__((aligned(8))); size_subflow_data has to be set to 'sizeof(struct mptcp_subflow_data)'. This allows to extend mptcp_subflow_data structure later on without breaking backwards compatibility. If the structure is extended later on, kernel knows where the userspace-provided meta header ends, even if userspace uses an older (smaller) version of the structure. num_subflows must be set to 0. If the getsockopt request succeeds (return value is 0), it will be updated to contain the number of active subflows for the given logical connection. size_kernel must be set to 0. If the getsockopt request is successful, it will contain the size of the 'struct tcp_info' as known by the kernel. This is informational only. size_user must be set to 'sizeof(struct tcp_info)'. This allows the kernel to only fill in the space reserved/expected by userspace. Example: struct my_tcp_info { struct mptcp_subflow_data d; struct tcp_info ti[2]; }; struct my_tcp_info ti; socklen_t olen; memset(&ti, 0, sizeof(ti)); ti.d.size_subflow_data = sizeof(struct mptcp_subflow_data); ti.d.size_user = sizeof(struct tcp_info); olen = sizeof(ti); ret = getsockopt(fd, SOL_MPTCP, MPTCP_TCPINFO, &ti, &olen); if (ret < 0) die_perror("getsockopt MPTCP_TCPINFO"); mptcp_subflow_data.num_subflows is populated with the number of subflows that exist on the kernel side for the logical mptcp connection. This allows userspace to re-try with a larger tcp_info array if the number of subflows was larger than the available space in the ti[] array. olen has to be set to the number of bytes that userspace has allocated to receive the kernel data. It will be updated to contain the real number bytes that have been copied to by the kernel. In the above example, if the number if subflows was 1, olen is equal to 'sizeof(struct mptcp_subflow_data) + sizeof(struct tcp_info). For 2 or more subflows olen is equal to 'sizeof(struct my_tcp_info)'. If there was more data that could not be copied due to lack of space in the option buffer, userspace can detect this by checking mptcp_subflow_data->num_subflows. Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 3e9caeddda7e..3f013a513770 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -193,7 +193,15 @@ enum mptcp_event_attr { #define MPTCP_RST_EBADPERF 5 #define MPTCP_RST_EMIDDLEBOX 6 +struct mptcp_subflow_data { + __u32 size_subflow_data; /* size of this structure in userspace */ + __u32 num_subflows; /* must be 0, set by kernel */ + __u32 size_kernel; /* must be 0, set by kernel */ + __u32 size_user; /* size of one element in data[] */ +} __attribute__((aligned(8))); + /* MPTCP socket options */ -#define MPTCP_INFO 1 +#define MPTCP_INFO 1 +#define MPTCP_TCPINFO 2 #endif /* _UAPI_MPTCP_H */ -- cgit v1.2.3 From c11c5906bc0aba62a78da69035f6b30c6da6d13b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 17 Sep 2021 16:33:21 -0700 Subject: mptcp: add MPTCP_SUBFLOW_ADDRS getsockopt support This retrieves the address pairs of all subflows currently active for a given mptcp connection. It re-uses the same meta-header as for MPTCP_TCPINFO. A new structure is provided to hold the subflow address data: struct mptcp_subflow_addrs { union { __kernel_sa_family_t sa_family; struct sockaddr sa_local; struct sockaddr_in sin_local; struct sockaddr_in6 sin6_local; struct sockaddr_storage ss_local; }; union { struct sockaddr sa_remote; struct sockaddr_in sin_remote; struct sockaddr_in6 sin6_remote; struct sockaddr_storage ss_remote; }; }; Usage of the new getsockopt is very similar to MPTCP_TCPINFO one. Userspace allocates a 'struct mptcp_subflow_data', followed by one or more 'struct mptcp_subflow_addrs', then inits the mptcp_subflow_data structure as follows: struct mptcp_subflow_addrs *sf_addr; struct mptcp_subflow_data *addr; socklen_t olen = sizeof(*addr) + (8 * sizeof(*sf_addr)); addr = malloc(olen); addr->size_subflow_data = sizeof(*addr); addr->num_subflows = 0; addr->size_kernel = 0; addr->size_user = sizeof(struct mptcp_subflow_addrs); sf_addr = (struct mptcp_subflow_addrs *)(addr + 1); and then retrieves the endpoint addresses via: ret = getsockopt(fd, SOL_MPTCP, MPTCP_SUBFLOW_ADDRS, addr, &olen); If the call succeeds, kernel will have added up to 8 endpoint addresses after the 'mptcp_subflow_data' header. Userspace needs to re-check 'olen' value to detect how many bytes have been filled in by the kernel. Userspace can check addr->num_subflows to discover when there were more subflows that available data space. Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 3f013a513770..c8cc46f80a16 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -4,6 +4,13 @@ #include #include +#include /* for sockaddr_in */ +#include /* for sockaddr_in6 */ +#include /* for sockaddr_storage and sa_family */ + +#ifndef __KERNEL__ +#include /* for struct sockaddr */ +#endif #define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0) #define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1) @@ -200,8 +207,25 @@ struct mptcp_subflow_data { __u32 size_user; /* size of one element in data[] */ } __attribute__((aligned(8))); +struct mptcp_subflow_addrs { + union { + __kernel_sa_family_t sa_family; + struct sockaddr sa_local; + struct sockaddr_in sin_local; + struct sockaddr_in6 sin6_local; + struct __kernel_sockaddr_storage ss_local; + }; + union { + struct sockaddr sa_remote; + struct sockaddr_in sin_remote; + struct sockaddr_in6 sin6_remote; + struct __kernel_sockaddr_storage ss_remote; + }; +}; + /* MPTCP socket options */ #define MPTCP_INFO 1 #define MPTCP_TCPINFO 2 +#define MPTCP_SUBFLOW_ADDRS 3 #endif /* _UAPI_MPTCP_H */ -- cgit v1.2.3 From 5bd2182d58e9d9c6279b7a8a2f9b41add0e7f9cb Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 16 Feb 2021 19:46:48 -0500 Subject: audit,io_uring,io-wq: add some basic audit support to io_uring This patch adds basic auditing to io_uring operations, regardless of their context. This is accomplished by allocating audit_context structures for the io-wq worker and io_uring SQPOLL kernel threads as well as explicitly auditing the io_uring operations in io_issue_sqe(). Individual io_uring operations can bypass auditing through the "audit_skip" field in the struct io_op_def definition for the operation; although great care must be taken so that security relevant io_uring operations do not bypass auditing; please contact the audit mailing list (see the MAINTAINERS file) with any questions. The io_uring operations are audited using a new AUDIT_URINGOP record, an example is shown below: type=UNKNOWN[1336] msg=audit(1631800225.981:37289): uring_op=19 success=yes exit=0 items=0 ppid=15454 pid=15681 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) Thanks to Richard Guy Briggs for review and feedback. Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index daa481729e9b..a1997697c8b1 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -118,6 +118,7 @@ #define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */ #define AUDIT_BPF 1334 /* BPF subsystem */ #define AUDIT_EVENT_LISTENER 1335 /* Task joined multicast read socket */ +#define AUDIT_URINGOP 1336 /* io_uring operation */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ -- cgit v1.2.3 From 67daf270cebcf7aab4b3292b36f9adf357b23ddc Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Sun, 18 Apr 2021 21:54:47 -0400 Subject: audit: add filtering for io_uring records This patch adds basic audit io_uring filtering, using as much of the existing audit filtering infrastructure as possible. In order to do this we reuse the audit filter rule's syscall mask for the io_uring operation and we create a new filter for io_uring operations as AUDIT_FILTER_URING_EXIT/audit_filter_list[7]. Thanks to Richard Guy Briggs for his review, feedback, and work on the corresponding audit userspace changes. Acked-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index a1997697c8b1..ecf1edd2affa 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -167,8 +167,9 @@ #define AUDIT_FILTER_EXCLUDE 0x05 /* Apply rule before record creation */ #define AUDIT_FILTER_TYPE AUDIT_FILTER_EXCLUDE /* obsolete misleading naming */ #define AUDIT_FILTER_FS 0x06 /* Apply rule at __audit_inode_child */ +#define AUDIT_FILTER_URING_EXIT 0x07 /* Apply rule at io_uring op exit */ -#define AUDIT_NR_FILTERS 7 +#define AUDIT_NR_FILTERS 8 #define AUDIT_FILTER_PREPEND 0x10 /* Prepend to front of list */ -- cgit v1.2.3 From 50d7bd38c3aafc4749e05e8d7fcb616979143602 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 17 May 2021 20:01:15 -0700 Subject: stddef: Introduce struct_group() helper macro Kernel code has a regular need to describe groups of members within a structure usually when they need to be copied or initialized separately from the rest of the surrounding structure. The generally accepted design pattern in C is to use a named sub-struct: struct foo { int one; struct { int two; int three, four; } thing; int five; }; This would allow for traditional references and sizing: memcpy(&dst.thing, &src.thing, sizeof(dst.thing)); However, doing this would mean that referencing struct members enclosed by such named structs would always require including the sub-struct name in identifiers: do_something(dst.thing.three); This has tended to be quite inflexible, especially when such groupings need to be added to established code which causes huge naming churn. Three workarounds exist in the kernel for this problem, and each have other negative properties. To avoid the naming churn, there is a design pattern of adding macro aliases for the named struct: #define f_three thing.three This ends up polluting the global namespace, and makes it difficult to search for identifiers. Another common work-around in kernel code avoids the pollution by avoiding the named struct entirely, instead identifying the group's boundaries using either a pair of empty anonymous structs of a pair of zero-element arrays: struct foo { int one; struct { } start; int two; int three, four; struct { } finish; int five; }; struct foo { int one; int start[0]; int two; int three, four; int finish[0]; int five; }; This allows code to avoid needing to use a sub-struct named for member references within the surrounding structure, but loses the benefits of being able to actually use such a struct, making it rather fragile. Using these requires open-coded calculation of sizes and offsets. The efforts made to avoid common mistakes include lots of comments, or adding various BUILD_BUG_ON()s. Such code is left with no way for the compiler to reason about the boundaries (e.g. the "start" object looks like it's 0 bytes in length), making bounds checking depend on open-coded calculations: if (length > offsetof(struct foo, finish) - offsetof(struct foo, start)) return -EINVAL; memcpy(&dst.start, &src.start, offsetof(struct foo, finish) - offsetof(struct foo, start)); However, the vast majority of places in the kernel that operate on groups of members do so without any identification of the grouping, relying either on comments or implicit knowledge of the struct contents, which is even harder for the compiler to reason about, and results in even more fragile manual sizing, usually depending on member locations outside of the region (e.g. to copy "two" and "three", use the start of "four" to find the size): BUILD_BUG_ON((offsetof(struct foo, four) < offsetof(struct foo, two)) || (offsetof(struct foo, four) < offsetof(struct foo, three)); if (length > offsetof(struct foo, four) - offsetof(struct foo, two)) return -EINVAL; memcpy(&dst.two, &src.two, length); In order to have a regular programmatic way to describe a struct region that can be used for references and sizing, can be examined for bounds checking, avoids forcing the use of intermediate identifiers, and avoids polluting the global namespace, introduce the struct_group() macro. This macro wraps the member declarations to create an anonymous union of an anonymous struct (no intermediate name) and a named struct (for references and sizing): struct foo { int one; struct_group(thing, int two; int three, four; ); int five; }; if (length > sizeof(src.thing)) return -EINVAL; memcpy(&dst.thing, &src.thing, length); do_something(dst.three); There are some rare cases where the resulting struct_group() needs attributes added, so struct_group_attr() is also introduced to allow for specifying struct attributes (e.g. __align(x) or __packed). Additionally, there are places where such declarations would like to have the struct be tagged, so struct_group_tagged() is added. Given there is a need for a handful of UAPI uses too, the underlying __struct_group() macro has been defined in UAPI so it can be used there too. To avoid confusing scripts/kernel-doc, hide the macro from its struct parsing. Co-developed-by: Keith Packard Signed-off-by: Keith Packard Acked-by: Gustavo A. R. Silva Link: https://lore.kernel.org/lkml/20210728023217.GC35706@embeddedor Enhanced-by: Rasmus Villemoes Link: https://lore.kernel.org/lkml/41183a98-bdb9-4ad6-7eab-5a7292a6df84@rasmusvillemoes.dk Enhanced-by: Dan Williams Link: https://lore.kernel.org/lkml/1d9a2e6df2a9a35b2cdd50a9a68cac5991e7e5f0.camel@intel.com Enhanced-by: Daniel Vetter Link: https://lore.kernel.org/lkml/YQKa76A6XuFqgM03@phenom.ffwll.local Acked-by: Dan Williams Signed-off-by: Kees Cook --- include/uapi/linux/stddef.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index ee8220f8dcf5..610204f7c275 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -4,3 +4,24 @@ #ifndef __always_inline #define __always_inline inline #endif + +/** + * __struct_group() - Create a mirrored named and anonyomous struct + * + * @TAG: The tag name for the named sub-struct (usually empty) + * @NAME: The identifier name of the mirrored sub-struct + * @ATTRS: Any struct attributes (usually empty) + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical layout + * and size: one anonymous and one named. The former's members can be used + * normally without sub-struct naming, and the latter can be used to + * reason about the start, end, and size of the group of struct members. + * The named struct can also be explicitly tagged for layer reuse, as well + * as both having struct attributes appended. + */ +#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ + union { \ + struct { MEMBERS } ATTRS; \ + struct TAG { MEMBERS } ATTRS NAME; \ + } -- cgit v1.2.3 From e306784a8de08868d0ecbf78dd42a0051d0e14ce Mon Sep 17 00:00:00 2001 From: Subrat Mishra Date: Wed, 15 Sep 2021 11:22:23 +0530 Subject: cfg80211: AP mode driver offload for FILS association crypto Add a driver FILS crypto offload extended capability flag to indicate that the driver running in AP mode is capable of handling encryption and decryption of (Re)Association request and response frames. Add a command to set FILS AAD data to driver. This feature is supported on drivers running in AP mode only. This extended capability is exchanged with hostapd during cfg80211 init. If the driver indicates this capability, then before sending the Authentication response frame, hostapd sets FILS AAD data to the driver. This allows the driver to decrypt (Re)Association Request frame and encrypt (Re)Association Response frame. FILS Key derivation will still be done in hostapd. Signed-off-by: Subrat Mishra Link: https://lore.kernel.org/r/1631685143-13530-1-git-send-email-subratm@codeaurora.org [fix whitespace] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c2efea98e060..e89bbf856228 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -300,6 +300,29 @@ * the interface goes down. */ +/** + * DOC: FILS shared key crypto offload + * + * This feature is applicable to drivers running in AP mode. + * + * FILS shared key crypto offload can be advertised by drivers by setting + * @NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD flag. The drivers that support + * FILS shared key crypto offload should be able to encrypt and decrypt + * association frames for FILS shared key authentication as per IEEE 802.11ai. + * With this capability, for FILS key derivation, drivers depend on userspace. + * + * After FILS key derivation, userspace shares the FILS AAD details with the + * driver and the driver stores the same to use in decryption of association + * request and in encryption of association response. The below parameters + * should be given to the driver in %NL80211_CMD_SET_FILS_AAD. + * %NL80211_ATTR_MAC - STA MAC address, used for storing FILS AAD per STA + * %NL80211_ATTR_FILS_KEK - Used for encryption or decryption + * %NL80211_ATTR_FILS_NONCES - Used for encryption or decryption + * (STA Nonce 16 bytes followed by AP Nonce 16 bytes) + * + * Once the association is done, the driver cleans the FILS AAD data. + */ + /** * enum nl80211_commands - supported nl80211 commands * @@ -1200,6 +1223,12 @@ * @NL80211_CMD_COLOR_CHANGE_COMPLETED: Notify userland that the color change * has completed * + * @NL80211_CMD_SET_FILS_AAD: Set FILS AAD data to the driver using - + * &NL80211_ATTR_MAC - for STA MAC address + * &NL80211_ATTR_FILS_KEK - for KEK + * &NL80211_ATTR_FILS_NONCES - for FILS Nonces + * (STA Nonce 16 bytes followed by AP Nonce 16 bytes) + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1440,6 +1469,8 @@ enum nl80211_commands { NL80211_CMD_COLOR_CHANGE_ABORTED, NL80211_CMD_COLOR_CHANGE_COMPLETED, + NL80211_CMD_SET_FILS_AAD, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -5995,6 +6026,11 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_BSS_COLOR: The driver supports BSS color collision * detection and change announcemnts. * + * @NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD: Driver running in AP mode supports + * FILS encryption and decryption for (Re)Association Request and Response + * frames. Userspace has to share FILS AAD details to the driver by using + * @NL80211_CMD_SET_FILS_AAD. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6060,6 +6096,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SECURE_RTT, NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE, NL80211_EXT_FEATURE_BSS_COLOR, + NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From dc1e3cb8da8b414b37208b2fb6755fef8122504b Mon Sep 17 00:00:00 2001 From: John Crispin Date: Wed, 15 Sep 2021 19:54:34 -0700 Subject: nl80211: MBSSID and EMA support in AP mode Add new attributes to configure support for multiple BSSID and advanced multi-BSSID advertisements (EMA) in AP mode. - NL80211_ATTR_MBSSID_CONFIG used for per interface configuration. - NL80211_ATTR_MBSSID_ELEMS used to MBSSID elements for beacons. Memory for the elements is allocated dynamically. This change frees the memory in existing functions which call nl80211_parse_beacon(), a comment is added to indicate the new references to do the same. Signed-off-by: John Crispin Co-developed-by: Aloka Dixit Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/20210916025437.29138-2-alokad@codeaurora.org [don't leave ERR_PTR hanging around] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 76 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e89bbf856228..eda608b1eb09 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -360,7 +360,10 @@ * @NL80211_CMD_DEL_INTERFACE: Virtual interface was deleted, has attributes * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_WIPHY. Can also be sent from * userspace to request deletion of a virtual interface, then requires - * attribute %NL80211_ATTR_IFINDEX. + * attribute %NL80211_ATTR_IFINDEX. If multiple BSSID advertisements are + * enabled using %NL80211_ATTR_MBSSID_CONFIG, %NL80211_ATTR_MBSSID_ELEMS, + * and if this command is used for the transmitting interface, then all + * the non-transmitting interfaces are deleted as well. * * @NL80211_CMD_GET_KEY: Get sequence counter information for a key specified * by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC. @@ -2624,6 +2627,18 @@ enum nl80211_commands { * @NL80211_ATTR_COLOR_CHANGE_ELEMS: Nested set of attributes containing the IE * information for the time while performing a color switch. * + * @NL80211_ATTR_MBSSID_CONFIG: Nested attribute for multiple BSSID + * advertisements (MBSSID) parameters in AP mode. + * Kernel uses this attribute to indicate the driver's support for MBSSID + * and enhanced multi-BSSID advertisements (EMA AP) to the userspace. + * Userspace should use this attribute to configure per interface MBSSID + * parameters. + * See &enum nl80211_mbssid_config_attributes for details. + * + * @NL80211_ATTR_MBSSID_ELEMS: Nested parameter to pass multiple BSSID elements. + * Mandatory parameter for the transmitting interface to enable MBSSID. + * Optional for the non-transmitting interfaces. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3127,6 +3142,9 @@ enum nl80211_attrs { NL80211_ATTR_COLOR_CHANGE_COLOR, NL80211_ATTR_COLOR_CHANGE_ELEMS, + NL80211_ATTR_MBSSID_CONFIG, + NL80211_ATTR_MBSSID_ELEMS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -7386,4 +7404,60 @@ enum nl80211_sar_specs_attrs { NL80211_SAR_ATTR_SPECS_MAX = __NL80211_SAR_ATTR_SPECS_LAST - 1, }; +/** + * enum nl80211_mbssid_config_attributes - multiple BSSID (MBSSID) and enhanced + * multi-BSSID advertisements (EMA) in AP mode. + * Kernel uses some of these attributes to advertise driver's support for + * MBSSID and EMA. + * Remaining attributes should be used by the userspace to configure the + * features. + * + * @__NL80211_MBSSID_CONFIG_ATTR_INVALID: Invalid + * + * @NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES: Used by the kernel to advertise + * the maximum number of MBSSID interfaces supported by the driver. + * Driver should indicate MBSSID support by setting + * wiphy->mbssid_max_interfaces to a value more than or equal to 2. + * + * @NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY: Used by the kernel + * to advertise the maximum profile periodicity supported by the driver + * if EMA is enabled. Driver should indicate EMA support to the userspace + * by setting wiphy->mbssid_max_ema_profile_periodicity to + * a non-zero value. + * + * @NL80211_MBSSID_CONFIG_ATTR_INDEX: Mandatory parameter to pass the index of + * this BSS (u8) in the multiple BSSID set. + * Value must be set to 0 for the transmitting interface and non-zero for + * all non-transmitting interfaces. The userspace will be responsible + * for using unique indices for the interfaces. + * Range: 0 to wiphy->mbssid_max_interfaces-1. + * + * @NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX: Mandatory parameter for + * a non-transmitted profile which provides the interface index (u32) of + * the transmitted profile. The value must match one of the interface + * indices advertised by the kernel. Optional if the interface being set up + * is the transmitting one, however, if provided then the value must match + * the interface index of the same. + * + * @NL80211_MBSSID_CONFIG_ATTR_EMA: Flag used to enable EMA AP feature. + * Setting this flag is permitted only if the driver advertises EMA support + * by setting wiphy->mbssid_max_ema_profile_periodicity to non-zero. + * + * @__NL80211_MBSSID_CONFIG_ATTR_LAST: Internal + * @NL80211_MBSSID_CONFIG_ATTR_MAX: highest attribute + */ +enum nl80211_mbssid_config_attributes { + __NL80211_MBSSID_CONFIG_ATTR_INVALID, + + NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES, + NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY, + NL80211_MBSSID_CONFIG_ATTR_INDEX, + NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX, + NL80211_MBSSID_CONFIG_ATTR_EMA, + + /* keep last */ + __NL80211_MBSSID_CONFIG_ATTR_LAST, + NL80211_MBSSID_CONFIG_ATTR_MAX = __NL80211_MBSSID_CONFIG_ATTR_LAST - 1, +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From 61bc346ce64a3864ac55f5d18bdc1572cda4fb18 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Wed, 25 Aug 2021 19:06:13 +0200 Subject: uapi/linux/prctl: provide macro definitions for the PR_SCHED_CORE type argument Commit 7ac592aa35a684ff ("sched: prctl() core-scheduling interface") made use of enum pid_type in prctl's arg4; this type and the associated enumeration definitions are not exposed to userspace. Christian has suggested to provide additional macro definitions that convey the meaning of the type argument more in alignment with its actual usage, and this patch does exactly that. Link: https://lore.kernel.org/r/20210825170613.GA3884@asgard.redhat.com Suggested-by: Christian Brauner Acked-by: Christian Brauner Signed-off-by: Eugene Syromiatnikov Complements: 7ac592aa35a684ff ("sched: prctl() core-scheduling interface") Signed-off-by: Christian Brauner --- include/uapi/linux/prctl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 43bd7f713c39..b2e4dc1449b9 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -268,5 +268,8 @@ struct prctl_mm_map { # define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ # define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ # define PR_SCHED_CORE_MAX 4 +# define PR_SCHED_CORE_SCOPE_THREAD 0 +# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 +# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From b84f60a307f09debe30cc171b0f0a5c36797cf67 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Thu, 5 Aug 2021 04:47:49 +0200 Subject: media: Rename V4L2_PIX_FMT_SUNXI_TILED_NV12 to V4L2_PIX_FMT_NV12_32L32 The V4L2_PIX_FMT_SUNXI_TILED_NV12 format is actually a fairly common NV12 tiled format, with 32x32 linear tiles. Rename the format and move its documentation together with the other tiled NV12 formats. Keep V4L2_PIX_FMT_SUNXI_TILED_NV12 for application compatibility. Signed-off-by: Ezequiel Garcia Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 9260791b8438..0188cd39468f 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -627,6 +627,9 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_YUV444M v4l2_fourcc('Y', 'M', '2', '4') /* 24 YUV444 planar */ #define V4L2_PIX_FMT_YVU444M v4l2_fourcc('Y', 'M', '4', '2') /* 24 YVU444 planar */ +/* Tiled YUV formats */ +#define V4L2_PIX_FMT_NV12_32L32 v4l2_fourcc('S', 'T', '1', '2') /* 12 Y/CbCr 4:2:0 32x32 tiles */ + /* Bayer formats - see http://www.siliconimaging.com/RGB%20Bayer.htm */ #define V4L2_PIX_FMT_SBGGR8 v4l2_fourcc('B', 'A', '8', '1') /* 8 BGBG.. GRGR.. */ #define V4L2_PIX_FMT_SGBRG8 v4l2_fourcc('G', 'B', 'R', 'G') /* 8 GBGB.. RGRG.. */ @@ -734,7 +737,6 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_Z16 v4l2_fourcc('Z', '1', '6', ' ') /* Depth data 16-bit */ #define V4L2_PIX_FMT_MT21C v4l2_fourcc('M', 'T', '2', '1') /* Mediatek compressed block mode */ #define V4L2_PIX_FMT_INZI v4l2_fourcc('I', 'N', 'Z', 'I') /* Intel Planar Greyscale 10-bit and Depth 16-bit */ -#define V4L2_PIX_FMT_SUNXI_TILED_NV12 v4l2_fourcc('S', 'T', '1', '2') /* Sunxi Tiled NV12 Format */ #define V4L2_PIX_FMT_CNF4 v4l2_fourcc('C', 'N', 'F', '4') /* Intel 4-bit packed depth confidence information */ #define V4L2_PIX_FMT_HI240 v4l2_fourcc('H', 'I', '2', '4') /* BTTV 8-bit dithered RGB */ @@ -2615,4 +2617,9 @@ struct v4l2_create_buffers { #define BASE_VIDIOC_PRIVATE 192 /* 192-255 are private */ +/* Deprecated definitions kept for backwards compatibility */ +#ifndef __KERNEL__ +#define V4L2_PIX_FMT_SUNXI_TILED_NV12 V4L2_PIX_FMT_NV12_32L32 +#endif + #endif /* _UAPI__LINUX_VIDEODEV2_H */ -- cgit v1.2.3 From 78eee7b5f110c9884c8ffd1dfcdd9c29296f3e43 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Thu, 5 Aug 2021 04:47:50 +0200 Subject: media: Rename V4L2_PIX_FMT_HM12 to V4L2_PIX_FMT_NV12_16L16 The V4L2_PIX_FMT_HM12 format is actually a simple NV12 tiled format, with 16x16 linear tiles. Rename the format and move its documentation together with the other tiled NV12 formats. Keep V4L2_PIX_FMT_HM12 for application compatibility. Signed-off-by: Ezequiel Garcia Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 0188cd39468f..40fec00ce73a 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -601,7 +601,6 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_NV61 v4l2_fourcc('N', 'V', '6', '1') /* 16 Y/CrCb 4:2:2 */ #define V4L2_PIX_FMT_NV24 v4l2_fourcc('N', 'V', '2', '4') /* 24 Y/CbCr 4:4:4 */ #define V4L2_PIX_FMT_NV42 v4l2_fourcc('N', 'V', '4', '2') /* 24 Y/CrCb 4:4:4 */ -#define V4L2_PIX_FMT_HM12 v4l2_fourcc('H', 'M', '1', '2') /* 8 YUV 4:2:0 16x16 macroblocks */ /* two non contiguous planes - one Y, one Cr + Cb interleaved */ #define V4L2_PIX_FMT_NV12M v4l2_fourcc('N', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 */ @@ -628,6 +627,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_YVU444M v4l2_fourcc('Y', 'M', '4', '2') /* 24 YVU444 planar */ /* Tiled YUV formats */ +#define V4L2_PIX_FMT_NV12_16L16 v4l2_fourcc('H', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 16x16 tiles */ #define V4L2_PIX_FMT_NV12_32L32 v4l2_fourcc('S', 'T', '1', '2') /* 12 Y/CbCr 4:2:0 32x32 tiles */ /* Bayer formats - see http://www.siliconimaging.com/RGB%20Bayer.htm */ @@ -2619,6 +2619,7 @@ struct v4l2_create_buffers { /* Deprecated definitions kept for backwards compatibility */ #ifndef __KERNEL__ +#define V4L2_PIX_FMT_HM12 V4L2_PIX_FMT_NV12_16L16 #define V4L2_PIX_FMT_SUNXI_TILED_NV12 V4L2_PIX_FMT_NV12_32L32 #endif -- cgit v1.2.3 From 683f71ebb35d9223b4a22488e2eaffac30af104d Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Thu, 5 Aug 2021 04:47:51 +0200 Subject: media: Add NV12_4L4 tiled format This format is produced by VeriSilicon Hantro G2 and VC8000D cores. It is a simple 4x4 tiling layout in a linear way. The pixel format was introduced by GStreamer using FourCC VT12, so let's stick to it. Link: https://gstreamer.freedesktop.org/documentation/video/video-format.html Signed-off-by: Ezequiel Garcia Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 40fec00ce73a..56003a5467fc 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -627,6 +627,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_YVU444M v4l2_fourcc('Y', 'M', '4', '2') /* 24 YVU444 planar */ /* Tiled YUV formats */ +#define V4L2_PIX_FMT_NV12_4L4 v4l2_fourcc('V', 'T', '1', '2') /* 12 Y/CbCr 4:2:0 4x4 tiles */ #define V4L2_PIX_FMT_NV12_16L16 v4l2_fourcc('H', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 16x16 tiles */ #define V4L2_PIX_FMT_NV12_32L32 v4l2_fourcc('S', 'T', '1', '2') /* 12 Y/CbCr 4:2:0 32x32 tiles */ -- cgit v1.2.3 From 75b8f8f2646ccaf085a87983329be8e47bd8b6bc Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Thu, 5 Aug 2021 04:47:52 +0200 Subject: media: Clean V4L2_PIX_FMT_NV12MT documentation Add more information about V4L2_PIX_FMT_NV12MT and V4L2_PIX_FMT_NV12M_16X16, so it's clearer for driver authors and users. Also, group the two pixel formats with the other tiled formats, for clarity. Unlike the recently introduced tiled formats (V4L2_PIX_FMT_NV12_4L4, etc) these formats have remained Samsung-specific until now. Therefore, and although the NV12MT and NV12MT_16X16 nomenclatures are less clear, we are keeping them as-is. Signed-off-by: Ezequiel Garcia Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 56003a5467fc..58392dcd3bf5 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -607,8 +607,6 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_NV21M v4l2_fourcc('N', 'M', '2', '1') /* 21 Y/CrCb 4:2:0 */ #define V4L2_PIX_FMT_NV16M v4l2_fourcc('N', 'M', '1', '6') /* 16 Y/CbCr 4:2:2 */ #define V4L2_PIX_FMT_NV61M v4l2_fourcc('N', 'M', '6', '1') /* 16 Y/CrCb 4:2:2 */ -#define V4L2_PIX_FMT_NV12MT v4l2_fourcc('T', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 64x32 macroblocks */ -#define V4L2_PIX_FMT_NV12MT_16X16 v4l2_fourcc('V', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 16x16 macroblocks */ /* three planes - Y Cb, Cr */ #define V4L2_PIX_FMT_YUV410 v4l2_fourcc('Y', 'U', 'V', '9') /* 9 YUV 4:1:0 */ @@ -631,6 +629,10 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_NV12_16L16 v4l2_fourcc('H', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 16x16 tiles */ #define V4L2_PIX_FMT_NV12_32L32 v4l2_fourcc('S', 'T', '1', '2') /* 12 Y/CbCr 4:2:0 32x32 tiles */ +/* Tiled YUV formats, non contiguous planes */ +#define V4L2_PIX_FMT_NV12MT v4l2_fourcc('T', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 64x32 tiles */ +#define V4L2_PIX_FMT_NV12MT_16X16 v4l2_fourcc('V', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 16x16 tiles */ + /* Bayer formats - see http://www.siliconimaging.com/RGB%20Bayer.htm */ #define V4L2_PIX_FMT_SBGGR8 v4l2_fourcc('B', 'A', '8', '1') /* 8 BGBG.. GRGR.. */ #define V4L2_PIX_FMT_SGBRG8 v4l2_fourcc('G', 'B', 'R', 'G') /* 8 GBGB.. RGRG.. */ -- cgit v1.2.3 From ffe5350c016a0de8ac77d32d9d4ea378cc9ff402 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 6 Aug 2021 06:15:25 +0200 Subject: media: add Mediatek's MM21 format Add Mediatek's non-compressed 8 bit block video mode. This format is produced by the MT8183 codec and can be converted to a non-proprietary format by the MDP3 component. Signed-off-by: Alexandre Courbot Signed-off-by: Tzung-Bi Shih Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 58392dcd3bf5..5cc9545feb40 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -739,6 +739,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_Y12I v4l2_fourcc('Y', '1', '2', 'I') /* Greyscale 12-bit L/R interleaved */ #define V4L2_PIX_FMT_Z16 v4l2_fourcc('Z', '1', '6', ' ') /* Depth data 16-bit */ #define V4L2_PIX_FMT_MT21C v4l2_fourcc('M', 'T', '2', '1') /* Mediatek compressed block mode */ +#define V4L2_PIX_FMT_MM21 v4l2_fourcc('M', 'M', '2', '1') /* Mediatek 8-bit block mode, two non-contiguous planes */ #define V4L2_PIX_FMT_INZI v4l2_fourcc('I', 'N', 'Z', 'I') /* Intel Planar Greyscale 10-bit and Depth 16-bit */ #define V4L2_PIX_FMT_CNF4 v4l2_fourcc('C', 'N', 'F', '4') /* Intel 4-bit packed depth confidence information */ #define V4L2_PIX_FMT_HI240 v4l2_fourcc('H', 'I', '2', '4') /* BTTV 8-bit dithered RGB */ -- cgit v1.2.3 From a9c80593ff80ddb7c6496624e5384e1ea3460a72 Mon Sep 17 00:00:00 2001 From: David Plowman Date: Mon, 16 Aug 2021 13:39:08 +0200 Subject: media: v4l2-ctrls: Add V4L2_CID_NOTIFY_GAINS control We add a new control V4L2_CID_NOTIFY_GAINS which allows the sensor to be notified what gains will be applied to the different colour channels by subsequent processing (such as by an ISP), even though the sensor will not apply any of these gains itself. For Bayer sensors this will be an array control taking 4 values which are the 4 gains arranged in the fixed order B, Gb, Gr and R, irrespective of the exact Bayer order of the sensor itself. The use of an array makes it straightforward to extend this control to non-Bayer sensors (for example, sensors with an RGBW pattern) in future. The units are in all cases linear with the default value indicating a gain of exactly 1.0. For example, if the default value were reported as 128 then the value 192 would represent a gain of exactly 1.5. Signed-off-by: David Plowman Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 5532b5f68493..133e20444939 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -1118,6 +1118,7 @@ enum v4l2_jpeg_chroma_subsampling { #define V4L2_CID_TEST_PATTERN_BLUE (V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 6) #define V4L2_CID_TEST_PATTERN_GREENB (V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 7) #define V4L2_CID_UNIT_CELL_SIZE (V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 8) +#define V4L2_CID_NOTIFY_GAINS (V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 9) /* Image processing controls */ -- cgit v1.2.3 From 965c1e0bfeb66888fb000540c1fc4e8b55533d9c Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 9 Sep 2021 13:24:27 +0200 Subject: media: videobuf2: add V4L2_MEMORY_FLAG_NON_COHERENT flag By setting or clearing the V4L2_MEMORY_FLAG_NON_COHERENT flag user-space should be able to hint vb2 that either non-coherent (if supported) or coherent memory should be used for the buffer allocation. Signed-off-by: Sergey Senozhatsky Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 5cc9545feb40..9b7032abb2c7 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -962,6 +962,8 @@ struct v4l2_requestbuffers { __u32 reserved[1]; }; +#define V4L2_MEMORY_FLAG_NON_COHERENT (1 << 0) + /* capabilities for struct v4l2_requestbuffers and v4l2_create_buffers */ #define V4L2_BUF_CAP_SUPPORTS_MMAP (1 << 0) #define V4L2_BUF_CAP_SUPPORTS_USERPTR (1 << 1) -- cgit v1.2.3 From c0acf9cfeee061f041fab778dbdcb34b6ca5e2e7 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 9 Sep 2021 13:24:29 +0200 Subject: media: videobuf2: handle V4L2_MEMORY_FLAG_NON_COHERENT flag This patch lets user-space request a non-coherent memory allocation during CREATE_BUFS and REQBUFS ioctl calls. = CREATE_BUFS struct v4l2_create_buffers has seven 4-byte reserved areas, so reserved[0] is renamed to ->flags. The struct, thus, now has six reserved 4-byte regions. = CREATE_BUFS32 struct v4l2_create_buffers32 has seven 4-byte reserved areas, so reserved[0] is renamed to ->flags. The struct, thus, now has six reserved 4-byte regions. = REQBUFS We use one byte of a 4 byte ->reserved[1] member of struct v4l2_requestbuffers. The struct, thus, now has reserved 3 bytes. Signed-off-by: Sergey Senozhatsky Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 9b7032abb2c7..f118fe7a9f58 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -959,7 +959,8 @@ struct v4l2_requestbuffers { __u32 type; /* enum v4l2_buf_type */ __u32 memory; /* enum v4l2_memory */ __u32 capabilities; - __u32 reserved[1]; + __u8 flags; + __u8 reserved[3]; }; #define V4L2_MEMORY_FLAG_NON_COHERENT (1 << 0) @@ -2507,6 +2508,9 @@ struct v4l2_dbg_chip_info { * @memory: enum v4l2_memory; buffer memory type * @format: frame format, for which buffers are requested * @capabilities: capabilities of this buffer type. + * @flags: additional buffer management attributes (ignored unless the + * queue has V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS capability + * and configured for MMAP streaming I/O). * @reserved: future extensions */ struct v4l2_create_buffers { @@ -2515,7 +2519,8 @@ struct v4l2_create_buffers { __u32 memory; struct v4l2_format format; __u32 capabilities; - __u32 reserved[7]; + __u32 flags; + __u32 reserved[6]; }; /* -- cgit v1.2.3 From a70e3f024d5f4ec7edb17ab5d927eb55397f1d15 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 30 Sep 2021 14:21:04 -0700 Subject: devlink: report maximum number of snapshots with regions Each region has an independently configurable number of maximum snapshots. This information is not reported to userspace, making it not very discoverable. Fix this by adding a new DEVLINK_ATTR_REGION_MAX_SNAPSHOST attribute which is used to report this maximum. Ex: $devlink region pci/0000:af:00.0/nvm-flash: size 10485760 snapshot [] max 1 pci/0000:af:00.0/device-caps: size 4096 snapshot [] max 10 pci/0000:af:00.1/nvm-flash: size 10485760 snapshot [] max 1 pci/0000:af:00.1/device-caps: size 4096 snapshot [] max 10 This information enables users to understand why a new region command may fail due to having too many existing snapshots. Reported-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Jacob Keller Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 32f53a0069d6..b897b80770f6 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -551,6 +551,8 @@ enum devlink_attr { DEVLINK_ATTR_RATE_NODE_NAME, /* string */ DEVLINK_ATTR_RATE_PARENT_NODE_NAME, /* string */ + DEVLINK_ATTR_REGION_MAX_SNAPSHOTS, /* u32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From ef5825e3cf0d0af657f5fb4dd86d750ed42fee0a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 2 Sep 2021 11:14:47 +1000 Subject: NFSD: move filehandle format declarations out of "uapi". A small part of the declaration concerning filehandle format are currently in the "uapi" include directory: include/uapi/linux/nfsd/nfsfh.h There is a lot more to the filehandle format, including "enum fid_type" and "enum nfsd_fsid" which are not exported via "uapi". This small part of the filehandle definition is of minimal use outside of the kernel, and I can find no evidence that an other code is using it. Certainly nfs-utils and wireshark (The most likely candidates) do not use these declarations. So move it out of "uapi" by copying the content from include/uapi/linux/nfsd/nfsfh.h into fs/nfsd/nfsfh.h A few unnecessary "#include" directives are not copied, and neither is the #define of fh_auth, which is annotated as being for userspace only. The copyright claims in the uapi file are identical to those in the nfsd file, so there is no need to copy those. The "__u32" style integer types are only needed in "uapi". In kernel-only code we can use the more familiar "u32" style. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- include/uapi/linux/nfsd/nfsfh.h | 115 ---------------------------------------- 1 file changed, 115 deletions(-) delete mode 100644 include/uapi/linux/nfsd/nfsfh.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h deleted file mode 100644 index e29e8accc4f4..000000000000 --- a/include/uapi/linux/nfsd/nfsfh.h +++ /dev/null @@ -1,115 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * This file describes the layout of the file handles as passed - * over the wire. - * - * Copyright (C) 1995, 1996, 1997 Olaf Kirch - */ - -#ifndef _UAPI_LINUX_NFSD_FH_H -#define _UAPI_LINUX_NFSD_FH_H - -#include -#include -#include -#include -#include - -/* - * This is the old "dentry style" Linux NFSv2 file handle. - * - * The xino and xdev fields are currently used to transport the - * ino/dev of the exported inode. - */ -struct nfs_fhbase_old { - __u32 fb_dcookie; /* dentry cookie - always 0xfeebbaca */ - __u32 fb_ino; /* our inode number */ - __u32 fb_dirino; /* dir inode number, 0 for directories */ - __u32 fb_dev; /* our device */ - __u32 fb_xdev; - __u32 fb_xino; - __u32 fb_generation; -}; - -/* - * This is the new flexible, extensible style NFSv2/v3/v4 file handle. - * - * The file handle starts with a sequence of four-byte words. - * The first word contains a version number (1) and three descriptor bytes - * that tell how the remaining 3 variable length fields should be handled. - * These three bytes are auth_type, fsid_type and fileid_type. - * - * All four-byte values are in host-byte-order. - * - * The auth_type field is deprecated and must be set to 0. - * - * The fsid_type identifies how the filesystem (or export point) is - * encoded. - * Current values: - * 0 - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number - * NOTE: we cannot use the kdev_t device id value, because kdev_t.h - * says we mustn't. We must break it up and reassemble. - * 1 - 4 byte user specified identifier - * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED - * 3 - 4 byte device id, encoded for user-space, 4 byte inode number - * 4 - 4 byte inode number and 4 byte uuid - * 5 - 8 byte uuid - * 6 - 16 byte uuid - * 7 - 8 byte inode number and 16 byte uuid - * - * The fileid_type identified how the file within the filesystem is encoded. - * The values for this field are filesystem specific, exccept that - * filesystems must not use the values '0' or '0xff'. 'See enum fid_type' - * in include/linux/exportfs.h for currently registered values. - */ -struct nfs_fhbase_new { - union { - struct { - __u8 fb_version_aux; /* == 1, even => nfs_fhbase_old */ - __u8 fb_auth_type_aux; - __u8 fb_fsid_type_aux; - __u8 fb_fileid_type_aux; - __u32 fb_auth[1]; - /* __u32 fb_fsid[0]; floating */ - /* __u32 fb_fileid[0]; floating */ - }; - struct { - __u8 fb_version; /* == 1, even => nfs_fhbase_old */ - __u8 fb_auth_type; - __u8 fb_fsid_type; - __u8 fb_fileid_type; - __u32 fb_auth_flex[]; /* flexible-array member */ - }; - }; -}; - -struct knfsd_fh { - unsigned int fh_size; /* significant for NFSv3. - * Points to the current size while building - * a new file handle - */ - union { - struct nfs_fhbase_old fh_old; - __u32 fh_pad[NFS4_FHSIZE/4]; - struct nfs_fhbase_new fh_new; - } fh_base; -}; - -#define ofh_dcookie fh_base.fh_old.fb_dcookie -#define ofh_ino fh_base.fh_old.fb_ino -#define ofh_dirino fh_base.fh_old.fb_dirino -#define ofh_dev fh_base.fh_old.fb_dev -#define ofh_xdev fh_base.fh_old.fb_xdev -#define ofh_xino fh_base.fh_old.fb_xino -#define ofh_generation fh_base.fh_old.fb_generation - -#define fh_version fh_base.fh_new.fb_version -#define fh_fsid_type fh_base.fh_new.fb_fsid_type -#define fh_auth_type fh_base.fh_new.fb_auth_type -#define fh_fileid_type fh_base.fh_new.fb_fileid_type -#define fh_fsid fh_base.fh_new.fb_auth_flex - -/* Do not use, provided for userspace compatiblity. */ -#define fh_auth fh_base.fh_new.fb_auth - -#endif /* _UAPI_LINUX_NFSD_FH_H */ -- cgit v1.2.3 From dea8ee31a039277576c215fffa13957970246366 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 27 Sep 2021 17:10:14 +0530 Subject: RISC-V: KVM: Add SBI v0.1 support The KVM host kernel is running in HS-mode needs so we need to handle the SBI calls coming from guest kernel running in VS-mode. This patch adds SBI v0.1 support in KVM RISC-V. Almost all SBI v0.1 calls are implemented in KVM kernel module except GETCHAR and PUTCHART calls which are forwarded to user space because these calls cannot be implemented in kernel space. In future, when we implement SBI v0.2 for Guest, we will forward SBI v0.2 experimental and vendor extension calls to user space. Signed-off-by: Atish Patra Signed-off-by: Anup Patel Acked-by: Paolo Bonzini Reviewed-by: Paolo Bonzini Acked-by: Palmer Dabbelt --- include/uapi/linux/kvm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a067410ebea5..322b4b588d75 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -269,6 +269,7 @@ struct kvm_xen_exit { #define KVM_EXIT_AP_RESET_HOLD 32 #define KVM_EXIT_X86_BUS_LOCK 33 #define KVM_EXIT_XEN 34 +#define KVM_EXIT_RISCV_SBI 35 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -469,6 +470,13 @@ struct kvm_run { } msr; /* KVM_EXIT_XEN */ struct kvm_xen_exit xen; + /* KVM_EXIT_RISCV_SBI */ + struct { + unsigned long extension_id; + unsigned long function_id; + unsigned long args[6]; + unsigned long ret[2]; + } riscv_sbi; /* Fix the size of the union. */ char padding[256]; }; -- cgit v1.2.3 From 8cb3bf8bff3c47e171f6b66f9ccfc3f1451a11a2 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Sun, 3 Oct 2021 20:45:38 +0200 Subject: ipv6: ioam: Add support for the ip6ip6 encapsulation This patch adds support for the ip6ip6 encapsulation by providing three encap modes: inline, encap and auto. Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- include/uapi/linux/ioam6_iptunnel.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h index bae14636a8c8..829ffdfcacca 100644 --- a/include/uapi/linux/ioam6_iptunnel.h +++ b/include/uapi/linux/ioam6_iptunnel.h @@ -9,9 +9,38 @@ #ifndef _UAPI_LINUX_IOAM6_IPTUNNEL_H #define _UAPI_LINUX_IOAM6_IPTUNNEL_H +/* Encap modes: + * - inline: direct insertion + * - encap: ip6ip6 encapsulation + * - auto: inline for local packets, encap for in-transit packets + */ +enum { + __IOAM6_IPTUNNEL_MODE_MIN, + + IOAM6_IPTUNNEL_MODE_INLINE, + IOAM6_IPTUNNEL_MODE_ENCAP, + IOAM6_IPTUNNEL_MODE_AUTO, + + __IOAM6_IPTUNNEL_MODE_MAX, +}; + +#define IOAM6_IPTUNNEL_MODE_MIN (__IOAM6_IPTUNNEL_MODE_MIN + 1) +#define IOAM6_IPTUNNEL_MODE_MAX (__IOAM6_IPTUNNEL_MODE_MAX - 1) + enum { IOAM6_IPTUNNEL_UNSPEC, + + /* Encap mode */ + IOAM6_IPTUNNEL_MODE, /* u8 */ + + /* Tunnel dst address. + * For encap,auto modes. + */ + IOAM6_IPTUNNEL_DST, /* struct in6_addr */ + + /* IOAM Trace Header */ IOAM6_IPTUNNEL_TRACE, /* struct ioam6_trace_hdr */ + __IOAM6_IPTUNNEL_MAX, }; -- cgit v1.2.3 From 571e5c0efcb29c5dac8cf2949d3eed84ec43056c Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Wed, 19 May 2021 16:00:22 -0400 Subject: audit: add OPENAT2 record to list "how" info Since the openat2(2) syscall uses a struct open_how pointer to communicate its parameters they are not usefully recorded by the audit SYSCALL record's four existing arguments. Add a new audit record type OPENAT2 that reports the parameters in its third argument, struct open_how with fields oflag, mode and resolve. The new record in the context of an event would look like: time->Wed Mar 17 16:28:53 2021 type=PROCTITLE msg=audit(1616012933.531:184): proctitle= 73797363616C6C735F66696C652F6F70656E617432002F746D702F61756469742D 7465737473756974652D737641440066696C652D6F70656E617432 type=PATH msg=audit(1616012933.531:184): item=1 name="file-openat2" inode=29 dev=00:1f mode=0100600 ouid=0 ogid=0 rdev=00:00 obj=unconfined_u:object_r:user_tmp_t:s0 nametype=CREATE cap_fp=0 cap_fi=0 cap_fe=0 cap_fver=0 cap_frootid=0 type=PATH msg=audit(1616012933.531:184): item=0 name="/root/rgb/git/audit-testsuite/tests" inode=25 dev=00:1f mode=040700 ouid=0 ogid=0 rdev=00:00 obj=unconfined_u:object_r:user_tmp_t:s0 nametype=PARENT cap_fp=0 cap_fi=0 cap_fe=0 cap_fver=0 cap_frootid=0 type=CWD msg=audit(1616012933.531:184): cwd="/root/rgb/git/audit-testsuite/tests" type=OPENAT2 msg=audit(1616012933.531:184): oflag=0100302 mode=0600 resolve=0xa type=SYSCALL msg=audit(1616012933.531:184): arch=c000003e syscall=437 success=yes exit=4 a0=3 a1=7ffe315f1c53 a2=7ffe315f1550 a3=18 items=2 ppid=528 pid=540 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=ttyS0 ses=1 comm="openat2" exe="/root/rgb/git/audit-testsuite/tests/syscalls_file/openat2" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key="testsuite-1616012933-bjAUcEPO" Link: https://lore.kernel.org/r/d23fbb89186754487850367224b060e26f9b7181.1621363275.git.rgb@redhat.com Signed-off-by: Richard Guy Briggs Acked-by: Christian Brauner [PM: tweak subject, wrap example, move AUDIT_OPENAT2 to 1337] Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index daa481729e9b..afa2472ad5d6 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -118,6 +118,7 @@ #define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */ #define AUDIT_BPF 1334 /* BPF subsystem */ #define AUDIT_EVENT_LISTENER 1335 /* Task joined multicast read socket */ +#define AUDIT_OPENAT2 1337 /* Record showing openat2 how args */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ -- cgit v1.2.3 From d154abdda6dcac92c63141035e477ac18077ffd8 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 24 Sep 2021 07:13:54 -0500 Subject: ipmi: Fix a typo Spell "RESPONSE" correctly in a comment. Signed-off-by: Corey Minyard --- include/uapi/linux/ipmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipmi.h b/include/uapi/linux/ipmi.h index 32d148309b16..007e65f9243b 100644 --- a/include/uapi/linux/ipmi.h +++ b/include/uapi/linux/ipmi.h @@ -158,7 +158,7 @@ struct kernel_ipmi_msg { * is used for the receive in-kernel interface and in the receive * IOCTL. * - * The "IPMI_RESPONSE_RESPNOSE_TYPE" is a little strange sounding, but + * The "IPMI_RESPONSE_RESPONSE_TYPE" is a little strange sounding, but * it allows you to get the message results when you send a response * message. */ -- cgit v1.2.3 From 059747c245f0e9af5e109eece7d3414dbe08d513 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 24 Sep 2021 11:42:56 -0500 Subject: ipmi: Add support for IPMB direct messages An application has come up that has a device sitting right on the IPMB that would like to communicate with the BMC on the IPMB using normal IPMI commands. Sending these commands and handling the responses is easy enough, no modifications are needed to the IPMI infrastructure. But if this is an application that also needs to receive IPMB commands and respond, some way is needed to handle these incoming commands and send the responses. Currently, the IPMI message handler only sends commands to the interface and only receives responses from interface. This change extends the interface to receive commands/responses and send commands/responses. These are formatted differently in support of receiving/sending IPMB messages directly. Signed-off-by: Corey Minyard Tested-by: Andrew Manley Reviewed-by: Andrew Manley --- include/uapi/linux/ipmi.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipmi.h b/include/uapi/linux/ipmi.h index 007e65f9243b..966c3070959b 100644 --- a/include/uapi/linux/ipmi.h +++ b/include/uapi/linux/ipmi.h @@ -80,6 +80,20 @@ struct ipmi_ipmb_addr { unsigned char lun; }; +/* + * Used for messages received directly from an IPMB that have not gone + * through a MC. This is for systems that sit right on an IPMB so + * they can receive commands and respond to them. + */ +#define IPMI_IPMB_DIRECT_ADDR_TYPE 0x81 +struct ipmi_ipmb_direct_addr { + int addr_type; + short channel; + unsigned char slave_addr; + unsigned char rs_lun; + unsigned char rq_lun; +}; + /* * A LAN Address. This is an address to/from a LAN interface bridged * by the BMC, not an address actually out on the LAN. -- cgit v1.2.3 From 29a9f27574692a71c04fd41ca4bbf8eae842af13 Mon Sep 17 00:00:00 2001 From: Shuo Liu Date: Thu, 23 Sep 2021 16:41:27 +0800 Subject: virt: acrn: Introduce interfaces for MMIO device passthrough MMIO device passthrough enables an OS in a virtual machine to directly access a MMIO device in the host. It promises almost the native performance, which is required in performance-critical scenarios of ACRN. HSM provides the following ioctls: - Assign - ACRN_IOCTL_ASSIGN_MMIODEV Pass data struct acrn_mmiodev from userspace to the hypervisor, and inform the hypervisor to assign a MMIO device to a User VM. - De-assign - ACRN_IOCTL_DEASSIGN_PCIDEV Pass data struct acrn_mmiodev from userspace to the hypervisor, and inform the hypervisor to de-assign a MMIO device from a User VM. These new APIs will be used by user space code vm_assign_mmiodev and vm_deassign_mmiodev in https://github.com/projectacrn/acrn-hypervisor/blob/master/devicemodel/core/vmmapi.c Signed-off-by: Shuo Liu Signed-off-by: Fei Li Link: https://lore.kernel.org/r/20210923084128.18902-2-fei1.li@intel.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/acrn.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/acrn.h b/include/uapi/linux/acrn.h index 353b2a2e4536..470036d6b1ac 100644 --- a/include/uapi/linux/acrn.h +++ b/include/uapi/linux/acrn.h @@ -396,6 +396,7 @@ struct acrn_ptdev_irq { /* Type of PCI device assignment */ #define ACRN_PTDEV_QUIRK_ASSIGN (1U << 0) +#define ACRN_MMIODEV_RES_NUM 3 #define ACRN_PCI_NUM_BARS 6 /** * struct acrn_pcidev - Info for assigning or de-assigning a PCI device @@ -417,6 +418,29 @@ struct acrn_pcidev { __u32 bar[ACRN_PCI_NUM_BARS]; }; +/** + * struct acrn_mmiodev - Info for assigning or de-assigning a MMIO device + * @name: Name of the MMIO device. + * @res[].user_vm_pa: Physical address of User VM of the MMIO region + * for the MMIO device. + * @res[].service_vm_pa: Physical address of Service VM of the MMIO + * region for the MMIO device. + * @res[].size: Size of the MMIO region for the MMIO device. + * @res[].mem_type: Memory type of the MMIO region for the MMIO + * device. + * + * This structure will be passed to hypervisor directly. + */ +struct acrn_mmiodev { + __u8 name[8]; + struct { + __u64 user_vm_pa; + __u64 service_vm_pa; + __u64 size; + __u64 mem_type; + } res[ACRN_MMIODEV_RES_NUM]; +}; + /** * struct acrn_msi_entry - Info for injecting a MSI interrupt to a VM * @msi_addr: MSI addr[19:12] with dest vCPU ID @@ -568,6 +592,10 @@ struct acrn_irqfd { _IOW(ACRN_IOCTL_TYPE, 0x55, struct acrn_pcidev) #define ACRN_IOCTL_DEASSIGN_PCIDEV \ _IOW(ACRN_IOCTL_TYPE, 0x56, struct acrn_pcidev) +#define ACRN_IOCTL_ASSIGN_MMIODEV \ + _IOW(ACRN_IOCTL_TYPE, 0x57, struct acrn_mmiodev) +#define ACRN_IOCTL_DEASSIGN_MMIODEV \ + _IOW(ACRN_IOCTL_TYPE, 0x58, struct acrn_mmiodev) #define ACRN_IOCTL_PM_GET_CPU_STATE \ _IOWR(ACRN_IOCTL_TYPE, 0x60, __u64) -- cgit v1.2.3 From 424f1ac2d832f31a2814c799bd50decf6a9f8e74 Mon Sep 17 00:00:00 2001 From: Shuo Liu Date: Thu, 23 Sep 2021 16:41:28 +0800 Subject: virt: acrn: Introduce interfaces for virtual device creating/destroying The ACRN hypervisor can emulate a virtual device within hypervisor for a Guest VM. The emulated virtual device can work without the ACRN userspace after creation. The hypervisor do the emulation of that device. To support the virtual device creating/destroying, HSM provides the following ioctls: - ACRN_IOCTL_CREATE_VDEV Pass data struct acrn_vdev from userspace to the hypervisor, and inform the hypervisor to create a virtual device for a User VM. - ACRN_IOCTL_DESTROY_VDEV Pass data struct acrn_vdev from userspace to the hypervisor, and inform the hypervisor to destroy a virtual device of a User VM. These new APIs will be used by user space code vm_add_hv_vdev and vm_remove_hv_vdev in https://github.com/projectacrn/acrn-hypervisor/blob/master/devicemodel/core/vmmapi.c Signed-off-by: Shuo Liu Signed-off-by: Fei Li Link: https://lore.kernel.org/r/20210923084128.18902-3-fei1.li@intel.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/acrn.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/acrn.h b/include/uapi/linux/acrn.h index 470036d6b1ac..ccf47ed92500 100644 --- a/include/uapi/linux/acrn.h +++ b/include/uapi/linux/acrn.h @@ -441,6 +441,44 @@ struct acrn_mmiodev { } res[ACRN_MMIODEV_RES_NUM]; }; +/** + * struct acrn_vdev - Info for creating or destroying a virtual device + * @id: Union of identifier of the virtual device + * @id.value: Raw data of the identifier + * @id.fields.vendor: Vendor id of the virtual PCI device + * @id.fields.device: Device id of the virtual PCI device + * @id.fields.legacy_id: ID of the virtual device if not a PCI device + * @slot: Virtual Bus/Device/Function of the virtual + * device + * @io_base: IO resource base address of the virtual device + * @io_size: IO resource size of the virtual device + * @args: Arguments for the virtual device creation + * + * The created virtual device can be a PCI device or a legacy device (e.g. + * a virtual UART controller) and it is emulated by the hypervisor. This + * structure will be passed to hypervisor directly. + */ +struct acrn_vdev { + /* + * the identifier of the device, the low 32 bits represent the vendor + * id and device id of PCI device and the high 32 bits represent the + * device number of the legacy device + */ + union { + __u64 value; + struct { + __le16 vendor; + __le16 device; + __le32 legacy_id; + } fields; + } id; + + __u64 slot; + __u32 io_addr[ACRN_PCI_NUM_BARS]; + __u32 io_size[ACRN_PCI_NUM_BARS]; + __u8 args[128]; +}; + /** * struct acrn_msi_entry - Info for injecting a MSI interrupt to a VM * @msi_addr: MSI addr[19:12] with dest vCPU ID @@ -596,6 +634,10 @@ struct acrn_irqfd { _IOW(ACRN_IOCTL_TYPE, 0x57, struct acrn_mmiodev) #define ACRN_IOCTL_DEASSIGN_MMIODEV \ _IOW(ACRN_IOCTL_TYPE, 0x58, struct acrn_mmiodev) +#define ACRN_IOCTL_CREATE_VDEV \ + _IOW(ACRN_IOCTL_TYPE, 0x59, struct acrn_vdev) +#define ACRN_IOCTL_DESTROY_VDEV \ + _IOW(ACRN_IOCTL_TYPE, 0x5A, struct acrn_vdev) #define ACRN_IOCTL_PM_GET_CPU_STATE \ _IOWR(ACRN_IOCTL_TYPE, 0x60, __u64) -- cgit v1.2.3 From 95a13ee858c9e426e63c97063677736f74af7163 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Oct 2021 16:55:44 +0300 Subject: hyper-v: Replace uuid.h with types.h There is no user of anything in uuid.h in the hyperv.h. Replace it with more appropriate types.h. Fixes: f081bbb3fd03 ("hyper-v: Remove internal types from UAPI header") Reported-by: Greg Kroah-Hartman Signed-off-by: Andy Shevchenko Reviewed-by: Haiyang Zhang Link: https://lore.kernel.org/r/20211001135544.1823-1-andriy.shevchenko@linux.intel.com Signed-off-by: Wei Liu --- include/uapi/linux/hyperv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h index 6135d92e0d47..daf82a230c0e 100644 --- a/include/uapi/linux/hyperv.h +++ b/include/uapi/linux/hyperv.h @@ -26,7 +26,7 @@ #ifndef _UAPI_HYPERV_H #define _UAPI_HYPERV_H -#include +#include /* * Framework version for util services. -- cgit v1.2.3 From 353407d917b2d87cd8104a0453d012439c6ca4be Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Oct 2021 13:46:42 +0300 Subject: ethtool: Add ability to control transceiver modules' power mode Add a pair of new ethtool messages, 'ETHTOOL_MSG_MODULE_SET' and 'ETHTOOL_MSG_MODULE_GET', that can be used to control transceiver modules parameters and retrieve their status. The first parameter to control is the power mode of the module. It is only relevant for paged memory modules, as flat memory modules always operate in low power mode. When a paged memory module is in low power mode, its power consumption is reduced to the minimum, the management interface towards the host is available and the data path is deactivated. User space can choose to put modules that are not currently in use in low power mode and transition them to high power mode before putting the associated ports administratively up. This is useful for user space that favors reduced power consumption and lower temperatures over reduced link up times. In QSFP-DD modules the transition from low power mode to high power mode can take a few seconds and this transition is only expected to get longer with future / more complex modules. User space can control the power mode of the module via the power mode policy attribute ('ETHTOOL_A_MODULE_POWER_MODE_POLICY'). Possible values: * high: Module is always in high power mode. * auto: Module is transitioned by the host to high power mode when the first port using it is put administratively up and to low power mode when the last port using it is put administratively down. The operational power mode of the module is available to user space via the 'ETHTOOL_A_MODULE_POWER_MODE' attribute. The attribute is not reported to user space when a module is not plugged-in. The user API is designed to be generic enough so that it could be used for modules with different memory maps (e.g., SFF-8636, CMIS). The only implementation of the device driver API in this series is for a MAC driver (mlxsw) where the module is controlled by the device's firmware, but it is designed to be generic enough so that it could also be used by implementations where the module is controlled by the CPU. CMIS testing ============ # ethtool -m swp11 Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628)) ... Module State : 0x03 (ModuleReady) LowPwrAllowRequestHW : Off LowPwrRequestSW : Off The module is not in low power mode, as it is not forced by hardware (LowPwrAllowRequestHW is off) or by software (LowPwrRequestSW is off). The power mode can be queried from the kernel. In case LowPwrAllowRequestHW was on, the kernel would need to take into account the state of the LowPwrRequestHW signal, which is not visible to user space. $ ethtool --show-module swp11 Module parameters for swp11: power-mode-policy high power-mode high Change the power mode policy to 'auto': # ethtool --set-module swp11 power-mode-policy auto Query the power mode again: $ ethtool --show-module swp11 Module parameters for swp11: power-mode-policy auto power-mode low Verify with the data read from the EEPROM: # ethtool -m swp11 Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628)) ... Module State : 0x01 (ModuleLowPwr) LowPwrAllowRequestHW : Off LowPwrRequestSW : On Put the associated port administratively up which will instruct the host to transition the module to high power mode: # ip link set dev swp11 up Query the power mode again: $ ethtool --show-module swp11 Module parameters for swp11: power-mode-policy auto power-mode high Verify with the data read from the EEPROM: # ethtool -m swp11 Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628)) ... Module State : 0x03 (ModuleReady) LowPwrAllowRequestHW : Off LowPwrRequestSW : Off Put the associated port administratively down which will instruct the host to transition the module to low power mode: # ip link set dev swp11 down Query the power mode again: $ ethtool --show-module swp11 Module parameters for swp11: power-mode-policy auto power-mode low Verify with the data read from the EEPROM: # ethtool -m swp11 Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628)) ... Module State : 0x01 (ModuleLowPwr) LowPwrAllowRequestHW : Off LowPwrRequestSW : On SFF-8636 testing ================ # ethtool -m swp13 Identifier : 0x11 (QSFP28) ... Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) enabled Power set : Off Power override : On ... Transmit avg optical power (Channel 1) : 0.7733 mW / -1.12 dBm Transmit avg optical power (Channel 2) : 0.7649 mW / -1.16 dBm Transmit avg optical power (Channel 3) : 0.7790 mW / -1.08 dBm Transmit avg optical power (Channel 4) : 0.7837 mW / -1.06 dBm Rcvr signal avg optical power(Channel 1) : 0.9302 mW / -0.31 dBm Rcvr signal avg optical power(Channel 2) : 0.9079 mW / -0.42 dBm Rcvr signal avg optical power(Channel 3) : 0.8993 mW / -0.46 dBm Rcvr signal avg optical power(Channel 4) : 0.8778 mW / -0.57 dBm The module is not in low power mode, as it is not forced by hardware (Power override is on) or by software (Power set is off). The power mode can be queried from the kernel. In case Power override was off, the kernel would need to take into account the state of the LPMode signal, which is not visible to user space. $ ethtool --show-module swp13 Module parameters for swp13: power-mode-policy high power-mode high Change the power mode policy to 'auto': # ethtool --set-module swp13 power-mode-policy auto Query the power mode again: $ ethtool --show-module swp13 Module parameters for swp13: power-mode-policy auto power-mode low Verify with the data read from the EEPROM: # ethtool -m swp13 Identifier : 0x11 (QSFP28) Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) not enabled Power set : On Power override : On ... Transmit avg optical power (Channel 1) : 0.0000 mW / -inf dBm Transmit avg optical power (Channel 2) : 0.0000 mW / -inf dBm Transmit avg optical power (Channel 3) : 0.0000 mW / -inf dBm Transmit avg optical power (Channel 4) : 0.0000 mW / -inf dBm Rcvr signal avg optical power(Channel 1) : 0.0000 mW / -inf dBm Rcvr signal avg optical power(Channel 2) : 0.0000 mW / -inf dBm Rcvr signal avg optical power(Channel 3) : 0.0000 mW / -inf dBm Rcvr signal avg optical power(Channel 4) : 0.0000 mW / -inf dBm Put the associated port administratively up which will instruct the host to transition the module to high power mode: # ip link set dev swp13 up Query the power mode again: $ ethtool --show-module swp13 Module parameters for swp13: power-mode-policy auto power-mode high Verify with the data read from the EEPROM: # ethtool -m swp13 Identifier : 0x11 (QSFP28) ... Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) enabled Power set : Off Power override : On ... Transmit avg optical power (Channel 1) : 0.7934 mW / -1.01 dBm Transmit avg optical power (Channel 2) : 0.7859 mW / -1.05 dBm Transmit avg optical power (Channel 3) : 0.7885 mW / -1.03 dBm Transmit avg optical power (Channel 4) : 0.7985 mW / -0.98 dBm Rcvr signal avg optical power(Channel 1) : 0.9325 mW / -0.30 dBm Rcvr signal avg optical power(Channel 2) : 0.9034 mW / -0.44 dBm Rcvr signal avg optical power(Channel 3) : 0.9086 mW / -0.42 dBm Rcvr signal avg optical power(Channel 4) : 0.8885 mW / -0.51 dBm Put the associated port administratively down which will instruct the host to transition the module to low power mode: # ip link set dev swp13 down Query the power mode again: $ ethtool --show-module swp13 Module parameters for swp13: power-mode-policy auto power-mode low Verify with the data read from the EEPROM: # ethtool -m swp13 Identifier : 0x11 (QSFP28) ... Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) not enabled Power set : On Power override : On ... Transmit avg optical power (Channel 1) : 0.0000 mW / -inf dBm Transmit avg optical power (Channel 2) : 0.0000 mW / -inf dBm Transmit avg optical power (Channel 3) : 0.0000 mW / -inf dBm Transmit avg optical power (Channel 4) : 0.0000 mW / -inf dBm Rcvr signal avg optical power(Channel 1) : 0.0000 mW / -inf dBm Rcvr signal avg optical power(Channel 2) : 0.0000 mW / -inf dBm Rcvr signal avg optical power(Channel 3) : 0.0000 mW / -inf dBm Rcvr signal avg optical power(Channel 4) : 0.0000 mW / -inf dBm Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 23 +++++++++++++++++++++++ include/uapi/linux/ethtool_netlink.h | 17 +++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index b6db6590baf0..6de61d53ca5d 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -706,6 +706,29 @@ enum ethtool_stringset { ETH_SS_COUNT }; +/** + * enum ethtool_module_power_mode_policy - plug-in module power mode policy + * @ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH: Module is always in high power mode. + * @ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO: Module is transitioned by the host + * to high power mode when the first port using it is put administratively + * up and to low power mode when the last port using it is put + * administratively down. + */ +enum ethtool_module_power_mode_policy { + ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH = 1, + ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO, +}; + +/** + * enum ethtool_module_power_mode - plug-in module power mode + * @ETHTOOL_MODULE_POWER_MODE_LOW: Module is in low power mode. + * @ETHTOOL_MODULE_POWER_MODE_HIGH: Module is in high power mode. + */ +enum ethtool_module_power_mode { + ETHTOOL_MODULE_POWER_MODE_LOW = 1, + ETHTOOL_MODULE_POWER_MODE_HIGH, +}; + /** * struct ethtool_gstrings - string set for data tagging * @cmd: Command number = %ETHTOOL_GSTRINGS diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 5545f1ca9237..ca5fbb59fa42 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -47,6 +47,8 @@ enum { ETHTOOL_MSG_MODULE_EEPROM_GET, ETHTOOL_MSG_STATS_GET, ETHTOOL_MSG_PHC_VCLOCKS_GET, + ETHTOOL_MSG_MODULE_GET, + ETHTOOL_MSG_MODULE_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -90,6 +92,8 @@ enum { ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY, ETHTOOL_MSG_STATS_GET_REPLY, ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, + ETHTOOL_MSG_MODULE_GET_REPLY, + ETHTOOL_MSG_MODULE_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -833,6 +837,19 @@ enum { ETHTOOL_A_STATS_RMON_MAX = (__ETHTOOL_A_STATS_RMON_CNT - 1) }; +/* MODULE */ + +enum { + ETHTOOL_A_MODULE_UNSPEC, + ETHTOOL_A_MODULE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_MODULE_POWER_MODE_POLICY, /* u8 */ + ETHTOOL_A_MODULE_POWER_MODE, /* u8 */ + + /* add new constants above here */ + __ETHTOOL_A_MODULE_CNT, + ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3 From 3dfb51126064b594470b9c0b278188fbc9194709 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Oct 2021 13:46:46 +0300 Subject: ethtool: Add transceiver module extended state Add an extended state and sub-state to describe link issues related to transceiver modules. The 'ETHTOOL_LINK_EXT_SUBSTATE_MODULE_CMIS_NOT_READY' extended sub-state tells user space that port is unable to gain a carrier because the CMIS Module State Machine did not reach the ModuleReady (Fully Operational) state. For example, if the module is stuck at ModuleLowPwr or ModuleFault state. In case of the latter, user space can read the fault reason from the module's EEPROM and potentially reset it. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 6de61d53ca5d..a2223b685451 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -603,6 +603,7 @@ enum ethtool_link_ext_state { ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, ETHTOOL_LINK_EXT_STATE_OVERHEAT, + ETHTOOL_LINK_EXT_STATE_MODULE, }; /* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */ @@ -649,6 +650,11 @@ enum ethtool_link_ext_substate_cable_issue { ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE, }; +/* More information in addition to ETHTOOL_LINK_EXT_STATE_MODULE. */ +enum ethtool_link_ext_substate_module { + ETHTOOL_LINK_EXT_SUBSTATE_MODULE_CMIS_NOT_READY = 1, +}; + #define ETH_GSTRING_LEN 32 /** -- cgit v1.2.3 From bf69bad38cf63d980e8a603f8d1bd1f85b5ed3d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Thu, 23 Sep 2021 14:11:05 -0300 Subject: futex: Implement sys_futex_waitv() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support to wait on multiple futexes. This is the interface implemented by this syscall: futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes, unsigned int flags, struct timespec *timeout, clockid_t clockid) struct futex_waitv { __u64 val; __u64 uaddr; __u32 flags; __u32 __reserved; }; Given an array of struct futex_waitv, wait on each uaddr. The thread wakes if a futex_wake() is performed at any uaddr. The syscall returns immediately if any waiter has *uaddr != val. *timeout is an optional absolute timeout value for the operation. This syscall supports only 64bit sized timeout structs. The flags argument of the syscall should be empty, but it can be used for future extensions. Flags for shared futexes, sizes, etc. should be used on the individual flags of each waiter. __reserved is used for explicit padding and should be 0, but it might be used for future extensions. If the userspace uses 32-bit pointers, it should make sure to explicitly cast it when assigning to waitv::uaddr. Returns the array index of one of the woken futexes. There’s no given information of how many were woken, or any particular attribute of it (if it’s the first woken, if it is of the smaller index...). Signed-off-by: André Almeida Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210923171111.300673-17-andrealmeid@collabora.com --- include/uapi/linux/futex.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index 235e5b2facaa..71a5df8d2689 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -43,6 +43,31 @@ #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ FUTEX_PRIVATE_FLAG) +/* + * Flags to specify the bit length of the futex word for futex2 syscalls. + * Currently, only 32 is supported. + */ +#define FUTEX_32 2 + +/* + * Max numbers of elements in a futex_waitv array + */ +#define FUTEX_WAITV_MAX 128 + +/** + * struct futex_waitv - A waiter for vectorized wait + * @val: Expected value at uaddr + * @uaddr: User address to wait on + * @flags: Flags for this waiter + * @__reserved: Reserved member to preserve data alignment. Should be 0. + */ +struct futex_waitv { + __u64 val; + __u64 uaddr; + __u32 flags; + __u32 __reserved; +}; + /* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. -- cgit v1.2.3 From 460275f124fb072dca218a6b43b6370eebbab20d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 5 Oct 2021 20:09:40 +0200 Subject: PCI: Add PCI_EXP_DEVCTL_PAYLOAD_* macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define a macro PCI_EXP_DEVCTL_PAYLOAD_* for every possible Max Payload Size in linux/pci_regs.h, in the same style as PCI_EXP_DEVCTL_READRQ_*. Link: https://lore.kernel.org/r/20211005180952.6812-2-kabel@kernel.org Signed-off-by: Pali Rohár Signed-off-by: Marek Behún Signed-off-by: Lorenzo Pieralisi Reviewed-by: Marek Behún Reviewed-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index e709ae8235e7..ff6ccbc6efe9 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -504,6 +504,12 @@ #define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ #define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ #define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ +#define PCI_EXP_DEVCTL_PAYLOAD_128B 0x0000 /* 128 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_256B 0x0020 /* 256 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_512B 0x0040 /* 512 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_1024B 0x0060 /* 1024 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_2048B 0x0080 /* 2048 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_4096B 0x00a0 /* 4096 Bytes */ #define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ #define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ #define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ -- cgit v1.2.3 From 4c1e34c0dbffb17accdfe16ac97ab432df9024ff Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Fri, 8 Oct 2021 11:00:53 +0100 Subject: vsock: Enable y2038 safe timeval for timeout Reuse the timeval compat code from core/sock to handle 32-bit and 64-bit timeval structures. Also introduce a new socket option define to allow using y2038 safe timeval under 32-bit. The existing behavior of sock_set_timeout and vsock's timeout setter differ when the time value is out of bounds. vsocks current behavior is retained at the expense of not being able to share the full implementation. This allows the LTP test vsock01 to pass under 32-bit compat mode. Fixes: fe0c72f3db11 ("socket: move compat timeout handling into sock.c") Signed-off-by: Richard Palethorpe Cc: Richard Palethorpe Reviewed-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/uapi/linux/vm_sockets.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index 46918a1852d7..c60ca33eac59 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -64,7 +64,7 @@ * timeout for a STREAM socket. */ -#define SO_VM_SOCKETS_CONNECT_TIMEOUT 6 +#define SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD 6 /* Option name for using non-blocking send/receive. Use as the option name * for setsockopt(3) or getsockopt(3) to set or get the non-blocking @@ -81,6 +81,17 @@ #define SO_VM_SOCKETS_NONBLOCK_TXRX 7 +#define SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW 8 + +#if !defined(__KERNEL__) +#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) +#define SO_VM_SOCKETS_CONNECT_TIMEOUT SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD +#else +#define SO_VM_SOCKETS_CONNECT_TIMEOUT \ + (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD : SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW) +#endif +#endif + /* The vSocket equivalent of INADDR_ANY. This works for the svm_cid field of * sockaddr_vm and indicates the context ID of the current endpoint. */ -- cgit v1.2.3 From 2c611ad97a82b51221bb0920cc6cac0b1d4c0e52 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Mon, 11 Oct 2021 14:12:37 +0200 Subject: net, neigh: Extend neigh->flags to 32 bit to allow for extensions Currently, all bits in struct ndmsg's ndm_flags are used up with the most recent addition of 435f2e7cc0b7 ("net: bridge: add support for sticky fdb entries"). This makes it impossible to extend the neighboring subsystem with new NTF_* flags: struct ndmsg { __u8 ndm_family; __u8 ndm_pad1; __u16 ndm_pad2; __s32 ndm_ifindex; __u16 ndm_state; __u8 ndm_flags; __u8 ndm_type; }; There are ndm_pad{1,2} attributes which are not used. However, due to uncareful design, the kernel does not enforce them to be zero upon new neighbor entry addition, and given they've been around forever, it is not possible to reuse them today due to risk of breakage. One option to overcome this limitation is to add a new NDA_FLAGS_EXT attribute for extended flags. In struct neighbour, there is a 3 byte hole between protocol and ha_lock, which allows neigh->flags to be extended from 8 to 32 bits while still being on the same cacheline as before. This also allows for all future NTF_* flags being in neigh->flags rather than yet another flags field. Unknown flags in NDA_FLAGS_EXT will be rejected by the kernel. Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 00a60695fa53..a80cca141855 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -31,6 +31,7 @@ enum { NDA_PROTOCOL, /* Originator of entry */ NDA_NH_ID, NDA_FDB_EXT_ATTRS, + NDA_FLAGS_EXT, __NDA_MAX }; -- cgit v1.2.3 From 7482e3841d520a368426ac196720601687e2dc47 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Oct 2021 14:12:38 +0200 Subject: net, neigh: Add NTF_MANAGED flag for managed neighbor entries Allow a user space control plane to insert entries with a new NTF_EXT_MANAGED flag. The flag then indicates to the kernel that the neighbor entry should be periodically probed for keeping the entry in NUD_REACHABLE state iff possible. The use case for this is targeting XDP or tc BPF load-balancers which use the bpf_fib_lookup() BPF helper in order to piggyback on neighbor resolution for their backends. Given they cannot be resolved in fast-path, a control plane inserts the L3 (without L2) entries manually into the neighbor table and lets the kernel do the neighbor resolution either on the gateway or on the backend directly in case the latter resides in the same L2. This avoids to deal with L2 in the control plane and to rebuild what the kernel already does best anyway. NTF_EXT_MANAGED can be combined with NTF_EXT_LEARNED in order to avoid GC eviction. The kernel then adds NTF_MANAGED flagged entries to a per-neighbor table which gets triggered by the system work queue to periodically call neigh_event_send() for performing the resolution. The implementation allows migration from/to NTF_MANAGED neighbor entries, so that already existing entries can be converted by the control plane if needed. Potentially, we could make the interval for periodically calling neigh_event_send() configurable; right now it's set to DELAY_PROBE_TIME which is also in line with mlxsw which has similar driver-internal infrastructure c723c735fa6b ("mlxsw: spectrum_router: Periodically update the kernel's neigh table"). In future, the latter could possibly reuse the NTF_MANAGED neighbors as well. Example: # ./ip/ip n replace 192.168.178.30 dev enp5s0 managed extern_learn # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a managed extern_learn REACHABLE [...] Signed-off-by: Daniel Borkmann Acked-by: Roopa Prabhu Link: https://linuxplumbersconf.org/event/11/contributions/953/ Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index a80cca141855..db05fb55055e 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -41,14 +41,16 @@ enum { * Neighbor Cache Entry Flags */ -#define NTF_USE 0x01 -#define NTF_SELF 0x02 -#define NTF_MASTER 0x04 -#define NTF_PROXY 0x08 /* == ATF_PUBL */ -#define NTF_EXT_LEARNED 0x10 -#define NTF_OFFLOADED 0x20 -#define NTF_STICKY 0x40 -#define NTF_ROUTER 0x80 +#define NTF_USE (1 << 0) +#define NTF_SELF (1 << 1) +#define NTF_MASTER (1 << 2) +#define NTF_PROXY (1 << 3) /* == ATF_PUBL */ +#define NTF_EXT_LEARNED (1 << 4) +#define NTF_OFFLOADED (1 << 5) +#define NTF_STICKY (1 << 6) +#define NTF_ROUTER (1 << 7) +/* Extended flags under NDA_FLAGS_EXT: */ +#define NTF_EXT_MANAGED (1 << 0) /* * Neighbor Cache Entry States. @@ -66,12 +68,22 @@ enum { #define NUD_PERMANENT 0x80 #define NUD_NONE 0x00 -/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change - * and make no address resolution or NUD. - * NUD_PERMANENT also cannot be deleted by garbage collectors. +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no + * address resolution or NUD. + * + * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true + * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier + * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED + * flagged entries explicitly are (which is also consistent with the routing + * subsystem). + * * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry * states don't make sense and thus are ignored. Such entries don't age and * can roam. + * + * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf + * of a user space control plane, and automatically refreshed so that (if + * possible) they remain in NUD_REACHABLE state. */ struct nda_cacheinfo { -- cgit v1.2.3 From 42df6e1d221dddc0f2acf2be37e68d553ad65f96 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 8 Oct 2021 22:06:03 +0200 Subject: netfilter: Introduce egress hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support classifying packets with netfilter on egress to satisfy user requirements such as: * outbound security policies for containers (Laura) * filtering and mangling intra-node Direct Server Return (DSR) traffic on a load balancer (Laura) * filtering locally generated traffic coming in through AF_PACKET, such as local ARP traffic generated for clustering purposes or DHCP (Laura; the AF_PACKET plumbing is contained in a follow-up commit) * L2 filtering from ingress and egress for AVB (Audio Video Bridging) and gPTP with nftables (Pablo) * in the future: in-kernel NAT64/NAT46 (Pablo) The egress hook introduced herein complements the ingress hook added by commit e687ad60af09 ("netfilter: add netfilter ingress hook after handle_ing() under unique static key"). A patch for nftables to hook up egress rules from user space has been submitted separately, so users may immediately take advantage of the feature. Alternatively or in addition to netfilter, packets can be classified with traffic control (tc). On ingress, packets are classified first by tc, then by netfilter. On egress, the order is reversed for symmetry. Conceptually, tc and netfilter can be thought of as layers, with netfilter layered above tc. Traffic control is capable of redirecting packets to another interface (man 8 tc-mirred). E.g., an ingress packet may be redirected from the host namespace to a container via a veth connection: tc ingress (host) -> tc egress (veth host) -> tc ingress (veth container) In this case, netfilter egress classifying is not performed when leaving the host namespace! That's because the packet is still on the tc layer. If tc redirects the packet to a physical interface in the host namespace such that it leaves the system, the packet is never subjected to netfilter egress classifying. That is only logical since it hasn't passed through netfilter ingress classifying either. Packets can alternatively be redirected at the netfilter layer using nft fwd. Such a packet *is* subjected to netfilter egress classifying since it has reached the netfilter layer. Internally, the skb->nf_skip_egress flag controls whether netfilter is invoked on egress by __dev_queue_xmit(). Because __dev_queue_xmit() may be called recursively by tunnel drivers such as vxlan, the flag is reverted to false after sch_handle_egress(). This ensures that netfilter is applied both on the overlay and underlying network. Interaction between tc and netfilter is possible by setting and querying skb->mark. If netfilter egress classifying is not enabled on any interface, it is patched out of the data path by way of a static_key and doesn't make a performance difference that is discernible from noise: Before: 1537 1538 1538 1537 1538 1537 Mb/sec After: 1536 1534 1539 1539 1539 1540 Mb/sec Before + tc accept: 1418 1418 1418 1419 1419 1418 Mb/sec After + tc accept: 1419 1424 1418 1419 1422 1420 Mb/sec Before + tc drop: 1620 1619 1619 1619 1620 1620 Mb/sec After + tc drop: 1616 1624 1625 1624 1622 1619 Mb/sec When netfilter egress classifying is enabled on at least one interface, a minimal performance penalty is incurred for every egress packet, even if the interface it's transmitted over doesn't have any netfilter egress rules configured. That is caused by checking dev->nf_hooks_egress against NULL. Measurements were performed on a Core i7-3615QM. Commands to reproduce: ip link add dev foo type dummy ip link set dev foo up modprobe pktgen echo "add_device foo" > /proc/net/pktgen/kpktgend_3 samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh -i foo -n 400000000 -m "11:11:11:11:11:11" -d 1.1.1.1 Accept all traffic with tc: tc qdisc add dev foo clsact tc filter add dev foo egress bpf da bytecode '1,6 0 0 0,' Drop all traffic with tc: tc qdisc add dev foo clsact tc filter add dev foo egress bpf da bytecode '1,6 0 0 2,' Apply this patch when measuring packet drops to avoid errors in dmesg: https://lore.kernel.org/netdev/a73dda33-57f4-95d8-ea51-ed483abd6a7a@iogearbox.net/ Signed-off-by: Lukas Wunner Cc: Laura García Liébana Cc: John Fastabend Cc: Daniel Borkmann Cc: Alexei Starovoitov Cc: Eric Dumazet Cc: Thomas Graf Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index ef9a44286e23..53411ccc69db 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -51,6 +51,7 @@ enum nf_inet_hooks { enum nf_dev_hooks { NF_NETDEV_INGRESS, + NF_NETDEV_EGRESS, NF_NETDEV_NUMHOOKS }; -- cgit v1.2.3 From 8b8ff8cc3b8155c18162e8b1f70e1230db176862 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 7 Sep 2021 19:39:01 +0300 Subject: perf/x86: Add new event for AUX output counter index PEBS-via-PT records contain a mask of applicable counters. To identify which event belongs to which counter, a side-band event is needed. Until now, there has been no side-band event, and consequently users were limited to using a single event. Add such a side-band event. Note the event is optimised to output only when the counter index changes for an event. That works only so long as all PEBS-via-PT events are scheduled together, which they are for a recording session because they are in a single group. Also no attribute bit is used to select the new event, so a new kernel is not compatible with older perf tools. The assumption being that PEBS-via-PT is sufficiently esoteric that users will not be troubled by this. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210907163903.11820-2-adrian.hunter@intel.com --- include/uapi/linux/perf_event.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index f92880a15645..c89535de1ec8 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1141,6 +1141,21 @@ enum perf_event_type { */ PERF_RECORD_TEXT_POKE = 20, + /* + * Data written to the AUX area by hardware due to aux_output, may need + * to be matched to the event by an architecture-specific hardware ID. + * This records the hardware ID, but requires sample_id to provide the + * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT + * records from multiple events. + * + * struct { + * struct perf_event_header header; + * u64 hw_id; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_AUX_OUTPUT_HW_ID = 21, + PERF_RECORD_MAX, /* non-ABI */ }; -- cgit v1.2.3 From e72aeb9ee0e34c57dc90793d0bf82cab9624d64e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Oct 2021 10:59:18 -0700 Subject: fq_codel: implement L4S style ce_threshold_ect1 marking Add TCA_FQ_CODEL_CE_THRESHOLD_ECT1 boolean option to select Low Latency, Low Loss, Scalable Throughput (L4S) style marking, along with ce_threshold. If enabled, only packets with ECT(1) can be transformed to CE if their sojourn time is above the ce_threshold. Note that this new option does not change rules for codel law. In particular, if TCA_FQ_CODEL_ECN is left enabled (this is the default when fq_codel qdisc is created), ECT(0) packets can still get CE if codel law (as governed by limit/target) decides so. Section 4.3.b of current draft [1] states: b. A scheduler with per-flow queues such as FQ-CoDel or FQ-PIE can be used for L4S. For instance within each queue of an FQ-CoDel system, as well as a CoDel AQM, there is typically also ECN marking at an immediate (unsmoothed) shallow threshold to support use in data centres (see Sec.5.2.7 of [RFC8290]). This can be modified so that the shallow threshold is solely applied to ECT(1) packets. Then if there is a flow of non-ECN or ECT(0) packets in the per-flow-queue, the Classic AQM (e.g. CoDel) is applied; while if there is a flow of ECT(1) packets in the queue, the shallower (typically sub-millisecond) threshold is applied. Tested: tc qd replace dev eth1 root fq_codel ce_threshold_ect1 50usec netperf ... -t TCP_STREAM -- K dctcp tc -s -d qd sh dev eth1 qdisc fq_codel 8022: root refcnt 32 limit 10240p flows 1024 quantum 9212 target 5ms ce_threshold_ect1 49us interval 100ms memory_limit 32Mb ecn drop_batch 64 Sent 14388596616 bytes 9543449 pkt (dropped 0, overlimits 0 requeues 152013) backlog 0b 0p requeues 152013 maxpacket 68130 drop_overlimit 0 new_flow_count 95678 ecn_mark 0 ce_mark 7639 new_flows_len 0 old_flows_len 0 [1] L4S current draft: https://datatracker.ietf.org/doc/html/draft-ietf-tsvwg-l4s-arch Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Ingemar Johansson S Cc: Tom Henderson Cc: Bob Briscoe Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index ec88590b3198..6be9a84cccfa 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -840,6 +840,7 @@ enum { TCA_FQ_CODEL_CE_THRESHOLD, TCA_FQ_CODEL_DROP_BATCH_SIZE, TCA_FQ_CODEL_MEMORY_LIMIT, + TCA_FQ_CODEL_CE_THRESHOLD_ECT1, __TCA_FQ_CODEL_MAX }; -- cgit v1.2.3 From b0539f5eddc2eefd24378bda3ee9cbbca916f58d Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 16 Oct 2021 11:37:51 +0200 Subject: net/smc: add netlink support for SMC-Rv2 Implement the netlink support for SMC-Rv2 related attributes that are provided to user space. Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index b175bd0165a1..20f33b27787f 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -84,17 +84,28 @@ enum { SMC_NLA_SYS_IS_ISM_V2, /* u8 */ SMC_NLA_SYS_LOCAL_HOST, /* string */ SMC_NLA_SYS_SEID, /* string */ + SMC_NLA_SYS_IS_SMCR_V2, /* u8 */ __SMC_NLA_SYS_MAX, SMC_NLA_SYS_MAX = __SMC_NLA_SYS_MAX - 1 }; -/* SMC_NLA_LGR_V2 nested attributes */ +/* SMC_NLA_LGR_D_V2_COMMON and SMC_NLA_LGR_R_V2_COMMON nested attributes */ enum { SMC_NLA_LGR_V2_VER, /* u8 */ SMC_NLA_LGR_V2_REL, /* u8 */ SMC_NLA_LGR_V2_OS, /* u8 */ SMC_NLA_LGR_V2_NEG_EID, /* string */ SMC_NLA_LGR_V2_PEER_HOST, /* string */ + __SMC_NLA_LGR_V2_MAX, + SMC_NLA_LGR_V2_MAX = __SMC_NLA_LGR_V2_MAX - 1 +}; + +/* SMC_NLA_LGR_R_V2 nested attributes */ +enum { + SMC_NLA_LGR_R_V2_UNSPEC, + SMC_NLA_LGR_R_V2_DIRECT, /* u8 */ + __SMC_NLA_LGR_R_V2_MAX, + SMC_NLA_LGR_R_V2_MAX = __SMC_NLA_LGR_R_V2_MAX - 1 }; /* SMC_GEN_LGR_SMCR attributes */ @@ -106,6 +117,8 @@ enum { SMC_NLA_LGR_R_PNETID, /* string */ SMC_NLA_LGR_R_VLAN_ID, /* u8 */ SMC_NLA_LGR_R_CONNS_NUM, /* u32 */ + SMC_NLA_LGR_R_V2_COMMON, /* nest */ + SMC_NLA_LGR_R_V2, /* nest */ __SMC_NLA_LGR_R_MAX, SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1 }; @@ -138,7 +151,7 @@ enum { SMC_NLA_LGR_D_PNETID, /* string */ SMC_NLA_LGR_D_CHID, /* u16 */ SMC_NLA_LGR_D_PAD, /* flag */ - SMC_NLA_LGR_V2, /* nest */ + SMC_NLA_LGR_D_V2_COMMON, /* nest */ __SMC_NLA_LGR_D_MAX, SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1 }; -- cgit v1.2.3 From e65c26f413718ed2e6d788491adcd8cebc0f44b6 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 29 Sep 2021 12:15:58 +0900 Subject: counter: Move counter enums to uapi header This is in preparation for a subsequent patch implementing a character device interface for the Counter subsystem. Reviewed-by: David Lechner Signed-off-by: William Breathitt Gray Link: https://lore.kernel.org/r/962a5f2027fafcf4f77c10e1baf520463960d1ee.1632884256.git.vilhelm.gray@gmail.com Signed-off-by: Jonathan Cameron --- include/uapi/linux/counter.h | 56 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 include/uapi/linux/counter.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h new file mode 100644 index 000000000000..6113938a6044 --- /dev/null +++ b/include/uapi/linux/counter.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Userspace ABI for Counter character devices + * Copyright (C) 2020 William Breathitt Gray + */ +#ifndef _UAPI_COUNTER_H_ +#define _UAPI_COUNTER_H_ + +/* Component scope definitions */ +enum counter_scope { + COUNTER_SCOPE_DEVICE, + COUNTER_SCOPE_SIGNAL, + COUNTER_SCOPE_COUNT, +}; + +/* Count direction values */ +enum counter_count_direction { + COUNTER_COUNT_DIRECTION_FORWARD, + COUNTER_COUNT_DIRECTION_BACKWARD, +}; + +/* Count mode values */ +enum counter_count_mode { + COUNTER_COUNT_MODE_NORMAL, + COUNTER_COUNT_MODE_RANGE_LIMIT, + COUNTER_COUNT_MODE_NON_RECYCLE, + COUNTER_COUNT_MODE_MODULO_N, +}; + +/* Count function values */ +enum counter_function { + COUNTER_FUNCTION_INCREASE, + COUNTER_FUNCTION_DECREASE, + COUNTER_FUNCTION_PULSE_DIRECTION, + COUNTER_FUNCTION_QUADRATURE_X1_A, + COUNTER_FUNCTION_QUADRATURE_X1_B, + COUNTER_FUNCTION_QUADRATURE_X2_A, + COUNTER_FUNCTION_QUADRATURE_X2_B, + COUNTER_FUNCTION_QUADRATURE_X4, +}; + +/* Signal values */ +enum counter_signal_level { + COUNTER_SIGNAL_LEVEL_LOW, + COUNTER_SIGNAL_LEVEL_HIGH, +}; + +/* Action mode values */ +enum counter_synapse_action { + COUNTER_SYNAPSE_ACTION_NONE, + COUNTER_SYNAPSE_ACTION_RISING_EDGE, + COUNTER_SYNAPSE_ACTION_FALLING_EDGE, + COUNTER_SYNAPSE_ACTION_BOTH_EDGES, +}; + +#endif /* _UAPI_COUNTER_H_ */ -- cgit v1.2.3 From b6c50affda5957a3629b149a91c7f6688ffce7f7 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 29 Sep 2021 12:15:59 +0900 Subject: counter: Add character device interface This patch introduces a character device interface for the Counter subsystem. Device data is exposed through standard character device read operations. Device data is gathered when a Counter event is pushed by the respective Counter device driver. Configuration is handled via ioctl operations on the respective Counter character device node. Cc: David Lechner Cc: Gwendal Grignou Cc: Dan Carpenter Cc: Oleksij Rempel Signed-off-by: William Breathitt Gray Link: https://lore.kernel.org/r/b8b8c64b4065aedff43699ad1f0e2f8d1419c15b.1632884256.git.vilhelm.gray@gmail.com Signed-off-by: Jonathan Cameron --- include/uapi/linux/counter.h | 98 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h index 6113938a6044..d0aa95aeff7b 100644 --- a/include/uapi/linux/counter.h +++ b/include/uapi/linux/counter.h @@ -6,6 +6,19 @@ #ifndef _UAPI_COUNTER_H_ #define _UAPI_COUNTER_H_ +#include +#include + +/* Component type definitions */ +enum counter_component_type { + COUNTER_COMPONENT_NONE, + COUNTER_COMPONENT_SIGNAL, + COUNTER_COMPONENT_COUNT, + COUNTER_COMPONENT_FUNCTION, + COUNTER_COMPONENT_SYNAPSE_ACTION, + COUNTER_COMPONENT_EXTENSION, +}; + /* Component scope definitions */ enum counter_scope { COUNTER_SCOPE_DEVICE, @@ -13,6 +26,91 @@ enum counter_scope { COUNTER_SCOPE_COUNT, }; +/** + * struct counter_component - Counter component identification + * @type: component type (one of enum counter_component_type) + * @scope: component scope (one of enum counter_scope) + * @parent: parent ID (matching the ID suffix of the respective parent sysfs + * path as described by the ABI documentation file + * Documentation/ABI/testing/sysfs-bus-counter) + * @id: component ID (matching the ID provided by the respective *_component_id + * sysfs attribute of the desired component) + * + * For example, if the Count 2 ceiling extension of Counter device 4 is desired, + * set type equal to COUNTER_COMPONENT_EXTENSION, scope equal to + * COUNTER_COUNT_SCOPE, parent equal to 2, and id equal to the value provided by + * the respective /sys/bus/counter/devices/counter4/count2/ceiling_component_id + * sysfs attribute. + */ +struct counter_component { + __u8 type; + __u8 scope; + __u8 parent; + __u8 id; +}; + +/* Event type definitions */ +enum counter_event_type { + /* Count value increased past ceiling */ + COUNTER_EVENT_OVERFLOW, + /* Count value decreased past floor */ + COUNTER_EVENT_UNDERFLOW, + /* Count value increased past ceiling, or decreased past floor */ + COUNTER_EVENT_OVERFLOW_UNDERFLOW, + /* Count value reached threshold */ + COUNTER_EVENT_THRESHOLD, + /* Index signal detected */ + COUNTER_EVENT_INDEX, +}; + +/** + * struct counter_watch - Counter component watch configuration + * @component: component to watch when event triggers + * @event: event that triggers (one of enum counter_event_type) + * @channel: event channel (typically 0 unless the device supports concurrent + * events of the same type) + */ +struct counter_watch { + struct counter_component component; + __u8 event; + __u8 channel; +}; + +/* + * Queues a Counter watch for the specified event. + * + * The queued watches will not be applied until COUNTER_ENABLE_EVENTS_IOCTL is + * called. + */ +#define COUNTER_ADD_WATCH_IOCTL _IOW(0x3E, 0x00, struct counter_watch) +/* + * Enables monitoring the events specified by the Counter watches that were + * queued by COUNTER_ADD_WATCH_IOCTL. + * + * If events are already enabled, the new set of watches replaces the old one. + * Calling this ioctl also has the effect of clearing the queue of watches added + * by COUNTER_ADD_WATCH_IOCTL. + */ +#define COUNTER_ENABLE_EVENTS_IOCTL _IO(0x3E, 0x01) +/* + * Stops monitoring the previously enabled events. + */ +#define COUNTER_DISABLE_EVENTS_IOCTL _IO(0x3E, 0x02) + +/** + * struct counter_event - Counter event data + * @timestamp: best estimate of time of event occurrence, in nanoseconds + * @value: component value + * @watch: component watch configuration + * @status: return status (system error number) + */ +struct counter_event { + __aligned_u64 timestamp; + __aligned_u64 value; + struct counter_watch watch; + __u8 status; +}; + /* Count direction values */ enum counter_count_direction { COUNTER_COUNT_DIRECTION_FORWARD, -- cgit v1.2.3 From b416beb25d9317499c823aea1522eefd8d72ea36 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 18 Oct 2021 11:29:34 +0800 Subject: mctp: unify sockaddr_mctp types Use the more precise __kernel_sa_family_t for smctp_family, to match struct sockaddr. Also, use an unsigned int for the network member; negative networks don't make much sense. We're already using unsigned for mctp_dev and mctp_skb_cb, but need to change mctp_sock to suit. Fixes: 60fc63981693 ("mctp: Add sockaddr_mctp to uapi") Signed-off-by: Jeremy Kerr Acked-by: Eugene Syromiatnikov Signed-off-by: David S. Miller --- include/uapi/linux/mctp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 52b54d13f385..f384962d8ff2 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -10,6 +10,7 @@ #define __UAPI_MCTP_H #include +#include typedef __u8 mctp_eid_t; @@ -18,8 +19,8 @@ struct mctp_addr { }; struct sockaddr_mctp { - unsigned short int smctp_family; - int smctp_network; + __kernel_sa_family_t smctp_family; + unsigned int smctp_network; struct mctp_addr smctp_addr; __u8 smctp_type; __u8 smctp_tag; -- cgit v1.2.3 From 5a20dd46b8b8459382685969cf0f196ae9fb9766 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 18 Oct 2021 11:29:35 +0800 Subject: mctp: Be explicit about struct sockaddr_mctp padding We currently have some implicit padding in struct sockaddr_mctp. This patch makes this padding explicit, and ensures we have consistent layout on platforms with <32bit alignmnent. Fixes: 60fc63981693 ("mctp: Add sockaddr_mctp to uapi") Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/uapi/linux/mctp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index f384962d8ff2..6acd4ccafbf7 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -20,10 +20,12 @@ struct mctp_addr { struct sockaddr_mctp { __kernel_sa_family_t smctp_family; + __u16 __smctp_pad0; unsigned int smctp_network; struct mctp_addr smctp_addr; __u8 smctp_type; __u8 smctp_tag; + __u8 __smctp_pad1; }; #define MCTP_NET_ANY 0x0 -- cgit v1.2.3 From 7bbbbfaa7a1b0b03890f25fba5f28bb8c7ef145a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Mon, 18 Oct 2021 11:37:56 +0200 Subject: ether: add EtherType for proprietary Realtek protocols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new EtherType ETH_P_REALTEK to the if_ether.h uapi header. The EtherType 0x8899 is used in a number of different protocols from Realtek Semiconductor Corp [1], so no general assumptions should be made when trying to decode such packets. Observed protocols include: 0x1 - Realtek Remote Control protocol [2] 0x2 - Echo protocol [2] 0x3 - Loop detection protocol [2] 0x4 - RTL8365MB 4- and 8-byte switch CPU tag protocols [3] 0x9 - RTL8306 switch CPU tag protocol [4] 0xA - RTL8366RB switch CPU tag protocol [4] [1] https://lore.kernel.org/netdev/CACRpkdYQthFgjwVzHyK3DeYUOdcYyWmdjDPG=Rf9B3VrJ12Rzg@mail.gmail.com/ [2] https://www.wireshark.org/lists/ethereal-dev/200409/msg00090.html [3] https://lore.kernel.org/netdev/20210822193145.1312668-4-alvin@pqrs.dk/ [4] https://lore.kernel.org/netdev/20200708122537.1341307-2-linus.walleij@linaro.org/ Suggested-by: Andrew Lunn Signed-off-by: Alvin Šipraga Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 5f589c7a8382..5da4ee234e0b 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -86,6 +86,7 @@ * over Ethernet */ #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ +#define ETH_P_REALTEK 0x8899 /* Multiple proprietary protocols */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ -- cgit v1.2.3 From 917425f71f36ce6f61841497040e10d0166106d8 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:19:27 +0200 Subject: rtc: add alarm related features Add more alarm related features to be declared by drivers. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018151933.76865-2-alexandre.belloni@bootlin.com --- include/uapi/linux/rtc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index f950bff75e97..f4037c541925 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -114,7 +114,9 @@ struct rtc_pll_info { #define RTC_FEATURE_ALARM 0 #define RTC_FEATURE_ALARM_RES_MINUTE 1 #define RTC_FEATURE_NEED_WEEK_DAY 2 -#define RTC_FEATURE_CNT 3 +#define RTC_FEATURE_ALARM_RES_2S 3 +#define RTC_FEATURE_UPDATE_INTERRUPT 4 +#define RTC_FEATURE_CNT 5 #define RTC_MAX_FREQ 8192 -- cgit v1.2.3 From 6a8af1b6568ad9ee08a419fb12c793f7992cf8a4 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:19:28 +0200 Subject: rtc: add parameter ioctl Add an ioctl allowing to get and set extra parameters for an RTC. For now, only handle getting available features. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018151933.76865-3-alexandre.belloni@bootlin.com --- include/uapi/linux/rtc.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index f4037c541925..3241f9ecc639 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -14,6 +14,7 @@ #include #include +#include /* * The struct used to pass data via the following ioctl. Similar to the @@ -66,6 +67,17 @@ struct rtc_pll_info { long pll_clock; /* base PLL frequency */ }; +struct rtc_param { + __u64 param; + union { + __u64 uvalue; + __s64 svalue; + __u64 ptr; + }; + __u32 index; + __u32 __pad; +}; + /* * ioctl calls that are permitted to the /dev/rtc interface, if * any of the RTC drivers are enabled. @@ -95,6 +107,9 @@ struct rtc_pll_info { #define RTC_PLL_GET _IOR('p', 0x11, struct rtc_pll_info) /* Get PLL correction */ #define RTC_PLL_SET _IOW('p', 0x12, struct rtc_pll_info) /* Set PLL correction */ +#define RTC_PARAM_GET _IOW('p', 0x13, struct rtc_param) /* Get parameter */ +#define RTC_PARAM_SET _IOW('p', 0x14, struct rtc_param) /* Set parameter */ + #define RTC_VL_DATA_INVALID _BITUL(0) /* Voltage too low, RTC data is invalid */ #define RTC_VL_BACKUP_LOW _BITUL(1) /* Backup voltage is low */ #define RTC_VL_BACKUP_EMPTY _BITUL(2) /* Backup empty or not present */ @@ -118,6 +133,9 @@ struct rtc_pll_info { #define RTC_FEATURE_UPDATE_INTERRUPT 4 #define RTC_FEATURE_CNT 5 +/* parameter list */ +#define RTC_PARAM_FEATURES 0 + #define RTC_MAX_FREQ 8192 -- cgit v1.2.3 From 2268551935dbf1abcbb4d4fb7b1ad74dbe0d1be0 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:19:29 +0200 Subject: rtc: expose correction feature Add a new feature for RTCs able to correct the oscillator imprecision. This is also called offset or trimming. Such drivers have a .set_offset callback, use that to set the feature bit from the core. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018151933.76865-4-alexandre.belloni@bootlin.com --- include/uapi/linux/rtc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index 3241f9ecc639..c83bb9a4fa4f 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -131,7 +131,8 @@ struct rtc_param { #define RTC_FEATURE_NEED_WEEK_DAY 2 #define RTC_FEATURE_ALARM_RES_2S 3 #define RTC_FEATURE_UPDATE_INTERRUPT 4 -#define RTC_FEATURE_CNT 5 +#define RTC_FEATURE_CORRECTION 5 +#define RTC_FEATURE_CNT 6 /* parameter list */ #define RTC_PARAM_FEATURES 0 -- cgit v1.2.3 From a6d8c6e1a5c6fb982964861dc84c0c7cb0151c7c Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:19:30 +0200 Subject: rtc: add correction parameter Add a new parameter allowing the get and set the correction using ioctls instead of just sysfs. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018151933.76865-5-alexandre.belloni@bootlin.com --- include/uapi/linux/rtc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index c83bb9a4fa4f..5debe82439c2 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -136,6 +136,7 @@ struct rtc_param { /* parameter list */ #define RTC_PARAM_FEATURES 0 +#define RTC_PARAM_CORRECTION 1 #define RTC_MAX_FREQ 8192 -- cgit v1.2.3 From 0d20e9fb1262b1f9ac895b287db892bc75b05b84 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:19:31 +0200 Subject: rtc: add BSM parameter BSM or Backup Switch Mode is a common feature on RTCs, allowing to select how the RTC will decide when to switch from its primary power supply to the backup power supply. It is necessary to be able to set it from userspace as there are uses cases where it has to be done dynamically. Supported values are: RTC_BSM_DISABLED: disabled RTC_BSM_DIRECT: switching will happen as soon as Vbackup > Vdd RTC_BSM_LEVEL: switching will happen around a threshold, usually with an hysteresis RTC_BSM_STANDBY: switching will not happen until Vdd > Vbackup, this is useful to ensure the RTC doesn't draw any power until the device is first powered on. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018151933.76865-6-alexandre.belloni@bootlin.com --- include/uapi/linux/rtc.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index 5debe82439c2..03e5b776e597 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -132,11 +132,18 @@ struct rtc_param { #define RTC_FEATURE_ALARM_RES_2S 3 #define RTC_FEATURE_UPDATE_INTERRUPT 4 #define RTC_FEATURE_CORRECTION 5 -#define RTC_FEATURE_CNT 6 +#define RTC_FEATURE_BACKUP_SWITCH_MODE 6 +#define RTC_FEATURE_CNT 7 /* parameter list */ #define RTC_PARAM_FEATURES 0 #define RTC_PARAM_CORRECTION 1 +#define RTC_PARAM_BACKUP_SWITCH_MODE 2 + +#define RTC_BSM_DISABLED 0 +#define RTC_BSM_DIRECT 1 +#define RTC_BSM_LEVEL 2 +#define RTC_BSM_STANDBY 3 #define RTC_MAX_FREQ 8192 -- cgit v1.2.3 From c68dc1b577eabd5605c6c7c08f3e07ae18d30d5d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 16 Sep 2021 18:15:35 +0000 Subject: KVM: x86: Report host tsc and realtime values in KVM_GET_CLOCK Handling the migration of TSCs correctly is difficult, in part because Linux does not provide userspace with the ability to retrieve a (TSC, realtime) clock pair for a single instant in time. In lieu of a more convenient facility, KVM can report similar information in the kvm_clock structure. Provide userspace with a host TSC & realtime pair iff the realtime clock is based on the TSC. If userspace provides KVM_SET_CLOCK with a valid realtime value, advance the KVM clock by the amount of elapsed time. Do not step the KVM clock backwards, though, as it is a monotonic oscillator. Suggested-by: Paolo Bonzini Signed-off-by: Oliver Upton Signed-off-by: Paolo Bonzini Message-Id: <20210916181538.968978-5-oupton@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 322b4b588d75..5ca5ffe16cb4 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1231,11 +1231,16 @@ struct kvm_irqfd { /* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ #define KVM_CLOCK_TSC_STABLE 2 +#define KVM_CLOCK_REALTIME (1 << 2) +#define KVM_CLOCK_HOST_TSC (1 << 3) struct kvm_clock_data { __u64 clock; __u32 flags; - __u32 pad[9]; + __u32 pad0; + __u64 realtime; + __u64 host_tsc; + __u32 pad[4]; }; /* For KVM_CAP_SW_TLB */ -- cgit v1.2.3 From 3080ea5553cc909b000d1f1d964a9041962f2c5b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 9 Aug 2021 11:21:23 -0700 Subject: stddef: Introduce DECLARE_FLEX_ARRAY() helper There are many places where kernel code wants to have several different typed trailing flexible arrays. This would normally be done with multiple flexible arrays in a union, but since GCC and Clang don't (on the surface) allow this, there have been many open-coded workarounds, usually involving neighboring 0-element arrays at the end of a structure. For example, instead of something like this: struct thing { ... union { struct type1 foo[]; struct type2 bar[]; }; }; code works around the compiler with: struct thing { ... struct type1 foo[0]; struct type2 bar[]; }; Another case is when a flexible array is wanted as the single member within a struct (which itself is usually in a union). For example, this would be worked around as: union many { ... struct { struct type3 baz[0]; }; }; These kinds of work-arounds cause problems with size checks against such zero-element arrays (for example when building with -Warray-bounds and -Wzero-length-bounds, and with the coming FORTIFY_SOURCE improvements), so they must all be converted to "real" flexible arrays, avoiding warnings like this: fs/hpfs/anode.c: In function 'hpfs_add_sector_to_btree': fs/hpfs/anode.c:209:27: warning: array subscript 0 is outside the bounds of an interior zero-length array 'struct bplus_internal_node[0]' [-Wzero-length-bounds] 209 | anode->btree.u.internal[0].down = cpu_to_le32(a); | ~~~~~~~~~~~~~~~~~~~~~~~^~~ In file included from fs/hpfs/hpfs_fn.h:26, from fs/hpfs/anode.c:10: fs/hpfs/hpfs.h:412:32: note: while referencing 'internal' 412 | struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving | ^~~~~~~~ drivers/net/can/usb/etas_es58x/es58x_fd.c: In function 'es58x_fd_tx_can_msg': drivers/net/can/usb/etas_es58x/es58x_fd.c:360:35: warning: array subscript 65535 is outside the bounds of an interior zero-length array 'u8[0]' {aka 'unsigned char[]'} [-Wzero-length-bounds] 360 | tx_can_msg = (typeof(tx_can_msg))&es58x_fd_urb_cmd->raw_msg[msg_len]; | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from drivers/net/can/usb/etas_es58x/es58x_core.h:22, from drivers/net/can/usb/etas_es58x/es58x_fd.c:17: drivers/net/can/usb/etas_es58x/es58x_fd.h:231:6: note: while referencing 'raw_msg' 231 | u8 raw_msg[0]; | ^~~~~~~ However, it _is_ entirely possible to have one or more flexible arrays in a struct or union: it just has to be in another struct. And since it cannot be alone in a struct, such a struct must have at least 1 other named member -- but that member can be zero sized. Wrap all this nonsense into the new DECLARE_FLEX_ARRAY() in support of having flexible arrays in unions (or alone in a struct). As with struct_group(), since this is needed in UAPI headers as well, implement the core there, with a non-UAPI wrapper. Additionally update kernel-doc to understand its existence. https://github.com/KSPP/linux/issues/137 Cc: Arnd Bergmann Cc: "Gustavo A. R. Silva" Signed-off-by: Kees Cook --- include/uapi/linux/stddef.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 610204f7c275..3021ea25a284 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -25,3 +25,19 @@ struct { MEMBERS } ATTRS; \ struct TAG { MEMBERS } ATTRS NAME; \ } + +/** + * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * + * @TYPE: The type of each flexible array element + * @NAME: The name of the flexible array member + * + * In order to have a flexible array member in a union or alone in a + * struct, it needs to be wrapped in an anonymous struct with at least 1 + * named member, but that member can be empty. + */ +#define __DECLARE_FLEX_ARRAY(TYPE, NAME) \ + struct { \ + struct { } __empty_ ## NAME; \ + TYPE NAME[]; \ + } -- cgit v1.2.3 From 47c662486cccf03e7062139d069b07ab0126ef59 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 13 Aug 2021 12:19:24 -0700 Subject: treewide: Replace 0-element memcpy() destinations with flexible arrays The 0-element arrays that are used as memcpy() destinations are actually flexible arrays. Adjust their structures accordingly so that memcpy() can better reason able their destination size (i.e. they need to be seen as "unknown" length rather than "zero"). In some cases, use of the DECLARE_FLEX_ARRAY() helper is needed when a flexible array is alone in a struct. Cc: "Gustavo A. R. Silva" Cc: Arnd Bergmann Cc: Kalle Valo Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Nilesh Javali Cc: Manish Rangankar Cc: GR-QLogic-Storage-Upstream@marvell.com Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: Larry Finger Cc: Phillip Potter Cc: Greg Kroah-Hartman Cc: Florian Schilhabel Cc: Johannes Berg Cc: Christophe JAILLET Cc: Fabio Aiuto Cc: Ross Schmidt Cc: Marco Cesati Cc: ath10k@lists.infradead.org Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: linux-staging@lists.linux.dev Signed-off-by: Kees Cook --- include/uapi/linux/dlm_device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dlm_device.h b/include/uapi/linux/dlm_device.h index f880d2831160..e83954c69fff 100644 --- a/include/uapi/linux/dlm_device.h +++ b/include/uapi/linux/dlm_device.h @@ -45,13 +45,13 @@ struct dlm_lock_params { void __user *bastaddr; struct dlm_lksb __user *lksb; char lvb[DLM_USER_LVB_LEN]; - char name[0]; + char name[]; }; struct dlm_lspace_params { __u32 flags; __u32 minor; - char name[0]; + char name[]; }; struct dlm_purge_params { -- cgit v1.2.3 From 223f903e9c832699f4e5f422281a60756c1c6cfe Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 12 Oct 2021 09:48:38 -0700 Subject: bpf: Rename BTF_KIND_TAG to BTF_KIND_DECL_TAG Patch set [1] introduced BTF_KIND_TAG to allow tagging declarations for struct/union, struct/union field, var, func and func arguments and these tags will be encoded into dwarf. They are also encoded to btf by llvm for the bpf target. After BTF_KIND_TAG is introduced, we intended to use it for kernel __user attributes. But kernel __user is actually a type attribute. Upstream and internal discussion showed it is not a good idea to mix declaration attribute and type attribute. So we proposed to introduce btf_type_tag as a type attribute and existing btf_tag renamed to btf_decl_tag ([2]). This patch renamed BTF_KIND_TAG to BTF_KIND_DECL_TAG and some other declarations with *_tag to *_decl_tag to make it clear the tag is for declaration. In the future, BTF_KIND_TYPE_TAG might be introduced per [3]. [1] https://lore.kernel.org/bpf/20210914223004.244411-1-yhs@fb.com/ [2] https://reviews.llvm.org/D111588 [3] https://reviews.llvm.org/D111199 Fixes: b5ea834dde6b ("bpf: Support for new btf kind BTF_KIND_TAG") Fixes: 5b84bd10363e ("libbpf: Add support for BTF_KIND_TAG") Fixes: 5c07f2fec003 ("bpftool: Add support for BTF_KIND_TAG") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211012164838.3345699-1-yhs@fb.com --- include/uapi/linux/btf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 642b6ecb37d7..deb12f755f0f 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO, VAR and TAG. + * FUNC, FUNC_PROTO, VAR and DECL_TAG. * "type" is a type_id referring to another type. */ union { @@ -74,7 +74,7 @@ enum { BTF_KIND_VAR = 14, /* Variable */ BTF_KIND_DATASEC = 15, /* Section */ BTF_KIND_FLOAT = 16, /* Floating point */ - BTF_KIND_TAG = 17, /* Tag */ + BTF_KIND_DECL_TAG = 17, /* Decl Tag */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, @@ -174,14 +174,14 @@ struct btf_var_secinfo { __u32 size; }; -/* BTF_KIND_TAG is followed by a single "struct btf_tag" to describe +/* BTF_KIND_DECL_TAG is followed by a single "struct btf_decl_tag" to describe * additional information related to the tag applied location. * If component_idx == -1, the tag is applied to a struct, union, * variable or function. Otherwise, it is applied to a struct/union * member or a func argument, and component_idx indicates which member * or argument (0 ... vlen-1). */ -struct btf_tag { +struct btf_decl_tag { __s32 component_idx; }; -- cgit v1.2.3 From 6224590d242fc8c6b26941328e02a40b4384949b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 2 Oct 2021 19:36:14 +0100 Subject: io_uring: add flag to not fail link after timeout For some reason non-off IORING_OP_TIMEOUT always fails links, it's pretty inconvenient and unnecessary limits chaining after it to hard linking, which is far from ideal, e.g. doesn't pair well with timeout cancellation. Add a flag forcing it to not fail links on -ETIME. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/17c7ec0fb7a6113cc6be8cdaedcada0ba836ac0e.1633199723.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b270a07b285e..c45b5e9a9387 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -158,6 +158,7 @@ enum { #define IORING_TIMEOUT_BOOTTIME (1U << 2) #define IORING_TIMEOUT_REALTIME (1U << 3) #define IORING_LINK_TIMEOUT_UPDATE (1U << 4) +#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) /* -- cgit v1.2.3 From f4c6217f7f5936f7173d028559ff5d25cce10816 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Wed, 6 Oct 2021 19:36:51 +0530 Subject: perf: Add comment about current state of PERF_MEM_LVL_* namespace and remove an extra line Add a comment about PERF_MEM_LVL_* namespace being depricated to some extent in favour of added PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. Remove an extra line present in perf_mem__lvl_scnprintf function. Signed-off-by: Kajol Jain Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211006140654.298352-2-kjain@linux.ibm.com --- include/uapi/linux/perf_event.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c89535de1ec8..a74538c5b4b5 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1256,7 +1256,13 @@ union perf_mem_data_src { #define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ #define PERF_MEM_OP_SHIFT 0 -/* memory hierarchy (memory level, hit or miss) */ +/* + * PERF_MEM_LVL_* namespace being depricated to some extent in the + * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. + * Supporting this namespace inorder to not break defined ABIs. + * + * memory hierarchy (memory level, hit or miss) + */ #define PERF_MEM_LVL_NA 0x01 /* not available */ #define PERF_MEM_LVL_HIT 0x02 /* hit level */ #define PERF_MEM_LVL_MISS 0x04 /* miss level */ -- cgit v1.2.3 From fec9cc6175d0ec1e13efe12be491d9bd4de62f80 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Wed, 6 Oct 2021 19:36:52 +0530 Subject: perf: Add mem_hops field in perf_mem_data_src structure Going forward, future generation systems can have more hierarchy within the node/package level but currently we don't have any data source encoding field in perf, which can be used to represent this level of data. Add a new field called 'mem_hops' in the perf_mem_data_src structure which can be used to represent intra-node/package or inter-node/off-package details. This field is of size 3 bits where PERF_MEM_HOPS_{NA, 0..6} value can be used to present different hop levels data. Also add corresponding macros to define mem_hop field values and shift value. Currently we define macro for HOPS_0 which corresponds to data coming from another core but same node. For ex: Encodings for mem_hops fields with L2 cache: L2 - local L2 L2 | REMOTE | HOPS_0 - remote core, same node L2 Signed-off-by: Kajol Jain Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211006140654.298352-3-kjain@linux.ibm.com --- include/uapi/linux/perf_event.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index a74538c5b4b5..bd8860eeb291 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1225,14 +1225,16 @@ union perf_mem_data_src { mem_remote:1, /* remote */ mem_snoopx:2, /* snoop mode, ext */ mem_blk:3, /* access blocked */ - mem_rsvd:21; + mem_hops:3, /* hop level */ + mem_rsvd:18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:21, + __u64 mem_rsvd:18, + mem_hops:3, /* hop level */ mem_blk:3, /* access blocked */ mem_snoopx:2, /* snoop mode, ext */ mem_remote:1, /* remote */ @@ -1328,6 +1330,11 @@ union perf_mem_data_src { #define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ #define PERF_MEM_BLK_SHIFT 40 +/* hop level */ +#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ +/* 2-7 available */ +#define PERF_MEM_HOPS_SHIFT 43 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) -- cgit v1.2.3 From 0a2b3e363566c4cc8792d37c5e73b9d9295e075c Mon Sep 17 00:00:00 2001 From: Coly Li Date: Wed, 20 Oct 2021 22:38:06 +0800 Subject: bcache: reserve never used bits from bkey.high There sre 3 bits in member high of struct bkey are never used, and no plan to support them in future, - HEADER_SIZE, start at bit 58, length 2 bits - KEY_PINNED, start at bit 55, length 1 bit No any kernel code, or user space tool references or accesses the three bits. Therefore it is possible and feasible to reserve the valuable bits from bkey.high. They can be used in future for other purpose. Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211020143812.6403-3-colyli@suse.de Signed-off-by: Jens Axboe --- include/uapi/linux/bcache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index cf7399f03b71..97413586195b 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -43,9 +43,9 @@ static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v) \ #define KEY_MAX_U64S 8 KEY_FIELD(KEY_PTRS, high, 60, 3) -KEY_FIELD(HEADER_SIZE, high, 58, 2) +KEY_FIELD(__PAD0, high, 58, 2) KEY_FIELD(KEY_CSUM, high, 56, 2) -KEY_FIELD(KEY_PINNED, high, 55, 1) +KEY_FIELD(__PAD1, high, 55, 1) KEY_FIELD(KEY_DIRTY, high, 36, 1) KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS) -- cgit v1.2.3 From 7aea2c0b48a568e6732c1d30516febe36bf555f1 Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Wed, 8 Sep 2021 14:03:15 +0100 Subject: media: allegro: add control to disable encoder buffer The encoder buffer can have a negative impact on the quality of the encoded video. Add a control to allow user space to disable the encoder buffer per channel if the VCU supports the encoder buffer but the quality is not sufficient. Signed-off-by: Michael Tretter Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 133e20444939..5fea5feb0412 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -211,6 +211,11 @@ enum v4l2_colorfx { * We reserve 128 controls for this driver. */ #define V4L2_CID_USER_CCS_BASE (V4L2_CID_USER_BASE + 0x10f0) +/* + * The base for Allegro driver controls. + * We reserve 16 controls for this driver. + */ +#define V4L2_CID_USER_ALLEGRO_BASE (V4L2_CID_USER_BASE + 0x1170) /* MPEG-class control IDs */ /* The MPEG controls are applicable to all codec controls -- cgit v1.2.3 From dfcb63ce1de6b10ba98dee928f9463f37e5a5512 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 19 Oct 2021 19:47:09 +0200 Subject: fq_codel: generalise ce_threshold marking for subset of traffic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit e72aeb9ee0e3 ("fq_codel: implement L4S style ce_threshold_ect1 marking") expanded the ce_threshold feature of FQ-CoDel so it can be applied to a subset of the traffic, using the ECT(1) bit of the ECN field as the classifier. However, hard-coding ECT(1) as the only classifier for this feature seems limiting, so let's expand it to be more general. To this end, change the parameter from a ce_threshold_ect1 boolean, to a one-byte selector/mask pair (ce_threshold_{selector,mask}) which is applied to the whole diffserv/ECN field in the IP header. This makes it possible to classify packets by any value in either the ECN field or the diffserv field. In particular, setting a selector of INET_ECN_ECT_1 and a mask of INET_ECN_MASK corresponds to the functionality before this patch, and a mask of ~INET_ECN_MASK allows using the selector as a straight-forward match against a diffserv code point: # apply ce_threshold to ECT(1) traffic tc qdisc replace dev eth0 root fq_codel ce_threshold 1ms ce_threshold_selector 0x1/0x3 # apply ce_threshold to ECN-capable traffic marked as diffserv AF22 tc qdisc replace dev eth0 root fq_codel ce_threshold 1ms ce_threshold_selector 0x50/0xfc Regardless of the selector chosen, the normal rules for ECN-marking of packets still apply, i.e., the flow must still declare itself ECN-capable by setting one of the bits in the ECN field to get marked at all. v2: - Add tc usage examples to patch description Signed-off-by: Toke Høiland-Jørgensen Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20211019174709.69081-1-toke@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 6be9a84cccfa..f292b467b27f 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -840,7 +840,8 @@ enum { TCA_FQ_CODEL_CE_THRESHOLD, TCA_FQ_CODEL_DROP_BATCH_SIZE, TCA_FQ_CODEL_MEMORY_LIMIT, - TCA_FQ_CODEL_CE_THRESHOLD_ECT1, + TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, + TCA_FQ_CODEL_CE_THRESHOLD_MASK, __TCA_FQ_CODEL_MAX }; -- cgit v1.2.3 From 1add667da24273631d4b1f5529789ec5253227f6 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 20 Oct 2021 08:11:47 +0300 Subject: nl80211: vendor-cmd: intel: add more details for IWL_MVM_VENDOR_CMD_HOST_GET_OWNERSHIP Explain more the expected flow for this command. Signed-off-by: Emmanuel Grumbach Link: https://lore.kernel.org/r/20211020051147.29297-1-emmanuel.grumbach@intel.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211-vnd-intel.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211-vnd-intel.h b/include/uapi/linux/nl80211-vnd-intel.h index 0bf177b84fd9..4ed7d0b24512 100644 --- a/include/uapi/linux/nl80211-vnd-intel.h +++ b/include/uapi/linux/nl80211-vnd-intel.h @@ -13,6 +13,35 @@ * enum iwl_mvm_vendor_cmd - supported vendor commands * @IWL_MVM_VENDOR_CMD_GET_CSME_CONN_INFO: reports CSME connection info. * @IWL_MVM_VENDOR_CMD_HOST_GET_OWNERSHIP: asks for ownership on the device. + * This is useful when the CSME firmware owns the device and the kernel + * wants to use it. In case the CSME firmware has no connection active the + * kernel will manage on its own to get ownership of the device. + * When the CSME firmware has an active connection, the user space + * involvement is required. The kernel will assert the RFKILL signal with + * the "device not owned" reason so that nobody can touch the device. Then + * the user space can run the following flow to be able to get connected + * to the very same AP the CSME firmware is currently connected to: + * + * 1) The user space (NetworkManager) boots and sees that the device is + * in RFKILL because the host doesn't own the device + * 2) The user space asks the kernel what AP the CSME firmware is + * connected to (with %IWL_MVM_VENDOR_CMD_GET_CSME_CONN_INFO) + * 3) The user space checks if it has a profile that matches the reply + * from the CSME firmware + * 4) The user space installs a network to the wpa_supplicant with a + * specific BSSID and a specific frequency + * 5) The user space prevents any type of full scan + * 6) The user space asks iwlmei to request ownership on the device (with + * this command) + * 7) iwlmei requests ownership from the CSME firmware + * 8) The CSME firmware grants ownership + * 9) iwlmei tells iwlwifi to lift the RFKILL + * 10) RFKILL OFF is reported to user space + * 11) The host boots the device, loads the firwmare, and connects to a + * specific BSSID without scanning including IP as fast as it can + * 12) The host reports to the CSME firmware that there is a connection + * 13) The TCP connection is preserved and the host has connectivity + * * @IWL_MVM_VENDOR_CMD_ROAMING_FORBIDDEN_EVENT: notifies if roaming is allowed. * It contains a &IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN and a * &IWL_MVM_VENDOR_ATTR_VIF_ADDR attributes. -- cgit v1.2.3 From 63fa04266629b9559d66c4dc18b03e0f9fc04a02 Mon Sep 17 00:00:00 2001 From: Srinivasan Raju Date: Mon, 18 Oct 2021 11:00:54 +0100 Subject: nl80211: Add LC placeholder band definition to nl80211_band Define LC band which is a draft under IEEE 802.11bb. Current NL80211_BAND_LC is a placeholder band and will be more defined IEEE 802.11bb progresses. Signed-off-by: Srinivasan Raju Link: https://lore.kernel.org/r/20211018100143.7565-2-srini.raju@purelifi.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index eda608b1eb09..6b816ef0155f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4978,6 +4978,7 @@ enum nl80211_txrate_gi { * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 69.12 GHz) * @NL80211_BAND_6GHZ: around 6 GHz band (5.9 - 7.2 GHz) * @NL80211_BAND_S1GHZ: around 900MHz, supported by S1G PHYs + * @NL80211_BAND_LC: light communication band (placeholder) * @NUM_NL80211_BANDS: number of bands, avoid using this in userspace * since newer kernel versions may support more bands */ @@ -4987,6 +4988,7 @@ enum nl80211_band { NL80211_BAND_60GHZ, NL80211_BAND_6GHZ, NL80211_BAND_S1GHZ, + NL80211_BAND_LC, NUM_NL80211_BANDS, }; -- cgit v1.2.3 From f9d366d420af4ce8719c59e60853573c02831f61 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Oct 2021 17:30:39 +0200 Subject: cfg80211: fix kernel-doc for MBSSID EMA The struct member ema_max_profile_periodicity was listed with the wrong name in the kernel-doc, fix that. Link: https://lore.kernel.org/r/20211021173038.18ec2030c66b.Iac731bb299525940948adad2c41f514b7dd81c47@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6b816ef0155f..61cab81e920d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -7424,7 +7424,7 @@ enum nl80211_sar_specs_attrs { * @NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY: Used by the kernel * to advertise the maximum profile periodicity supported by the driver * if EMA is enabled. Driver should indicate EMA support to the userspace - * by setting wiphy->mbssid_max_ema_profile_periodicity to + * by setting wiphy->ema_max_profile_periodicity to * a non-zero value. * * @NL80211_MBSSID_CONFIG_ATTR_INDEX: Mandatory parameter to pass the index of @@ -7443,7 +7443,7 @@ enum nl80211_sar_specs_attrs { * * @NL80211_MBSSID_CONFIG_ATTR_EMA: Flag used to enable EMA AP feature. * Setting this flag is permitted only if the driver advertises EMA support - * by setting wiphy->mbssid_max_ema_profile_periodicity to non-zero. + * by setting wiphy->ema_max_profile_periodicity to non-zero. * * @__NL80211_MBSSID_CONFIG_ATTR_LAST: Internal * @NL80211_MBSSID_CONFIG_ATTR_MAX: highest attribute -- cgit v1.2.3 From c353d7ce76bfbb87a89155616916cea1f0f1e78e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 19 Oct 2021 16:45:00 +0200 Subject: uapi: Add Add a header file providing translation primitives and tables for the conversion of (ASCII) characters to a 14-segments notation, as used by 14-segment alphanumeric displays. This follows the spirit of include/uapi/linux/map_to_7segment.h. Signed-off-by: Geert Uytterhoeven Signed-off-by: Miguel Ojeda --- include/uapi/linux/map_to_14segment.h | 241 ++++++++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 include/uapi/linux/map_to_14segment.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/map_to_14segment.h b/include/uapi/linux/map_to_14segment.h new file mode 100644 index 000000000000..0346ef76543b --- /dev/null +++ b/include/uapi/linux/map_to_14segment.h @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Copyright (C) 2021 Glider bv + * + * Based on include/uapi/linux/map_to_7segment.h: + + * Copyright (c) 2005 Henk Vergonet + */ + +#ifndef MAP_TO_14SEGMENT_H +#define MAP_TO_14SEGMENT_H + +/* This file provides translation primitives and tables for the conversion + * of (ASCII) characters to a 14-segments notation. + * + * The 14 segment's wikipedia notation below is used as standard. + * See: https://en.wikipedia.org/wiki/Fourteen-segment_display + * + * Notation: +---a---+ + * |\ | /| + * f h i j b + * | \|/ | + * +-g1+-g2+ + * | /|\ | + * e k l m c + * |/ | \| + * +---d---+ + * + * Usage: + * + * Register a map variable, and fill it with a character set: + * static SEG14_DEFAULT_MAP(map_seg14); + * + * + * Then use for conversion: + * seg14 = map_to_seg14(&map_seg14, some_char); + * ... + * + * In device drivers it is recommended, if required, to make the char map + * accessible via the sysfs interface using the following scheme: + * + * static ssize_t map_seg14_show(struct device *dev, + * struct device_attribute *attr, char *buf) + * { + * memcpy(buf, &map_seg14, sizeof(map_seg14)); + * return sizeof(map_seg14); + * } + * static ssize_t map_seg14_store(struct device *dev, + * struct device_attribute *attr, + * const char *buf, size_t cnt) + * { + * if (cnt != sizeof(map_seg14)) + * return -EINVAL; + * memcpy(&map_seg14, buf, cnt); + * return cnt; + * } + * static DEVICE_ATTR_RW(map_seg14); + */ +#include +#include + +#include + +#define BIT_SEG14_A 0 +#define BIT_SEG14_B 1 +#define BIT_SEG14_C 2 +#define BIT_SEG14_D 3 +#define BIT_SEG14_E 4 +#define BIT_SEG14_F 5 +#define BIT_SEG14_G1 6 +#define BIT_SEG14_G2 7 +#define BIT_SEG14_H 8 +#define BIT_SEG14_I 9 +#define BIT_SEG14_J 10 +#define BIT_SEG14_K 11 +#define BIT_SEG14_L 12 +#define BIT_SEG14_M 13 +#define BIT_SEG14_RESERVED1 14 +#define BIT_SEG14_RESERVED2 15 + +struct seg14_conversion_map { + __be16 table[128]; +}; + +static __inline__ int map_to_seg14(struct seg14_conversion_map *map, int c) +{ + if (c < 0 || c >= sizeof(map->table) / sizeof(map->table[0])) + return -EINVAL; + + return __be16_to_cpu(map->table[c]); +} + +#define SEG14_CONVERSION_MAP(_name, _map) \ + struct seg14_conversion_map _name = { .table = { _map } } + +/* + * It is recommended to use a facility that allows user space to redefine + * custom character sets for LCD devices. Please use a sysfs interface + * as described above. + */ +#define MAP_TO_SEG14_SYSFS_FILE "map_seg14" + +/******************************************************************************* + * ASCII conversion table + ******************************************************************************/ + +#define _SEG14(sym, a, b, c, d, e, f, g1, g2, h, j, k, l, m, n) \ + __cpu_to_be16( a << BIT_SEG14_A | b << BIT_SEG14_B | \ + c << BIT_SEG14_C | d << BIT_SEG14_D | \ + e << BIT_SEG14_E | f << BIT_SEG14_F | \ + g1 << BIT_SEG14_G1 | g2 << BIT_SEG14_G2 | \ + h << BIT_SEG14_H | j << BIT_SEG14_I | \ + k << BIT_SEG14_J | l << BIT_SEG14_K | \ + m << BIT_SEG14_L | n << BIT_SEG14_M ) + +#define _MAP_0_32_ASCII_SEG14_NON_PRINTABLE \ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + +#define _MAP_33_47_ASCII_SEG14_SYMBOL \ + _SEG14('!', 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('"', 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0), \ + _SEG14('#', 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0), \ + _SEG14('$', 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0), \ + _SEG14('%', 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0), \ + _SEG14('&', 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1), \ + _SEG14('\'',0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0), \ + _SEG14('(', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1), \ + _SEG14(')', 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0), \ + _SEG14('*', 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1), \ + _SEG14('+', 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0), \ + _SEG14(',', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), \ + _SEG14('-', 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0), \ + _SEG14('.', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), \ + _SEG14('/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0), + +#define _MAP_48_57_ASCII_SEG14_NUMERIC \ + _SEG14('0', 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0), \ + _SEG14('1', 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0), \ + _SEG14('2', 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0), \ + _SEG14('3', 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), \ + _SEG14('4', 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0), \ + _SEG14('5', 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1), \ + _SEG14('6', 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0), \ + _SEG14('7', 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0), \ + _SEG14('8', 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0), \ + _SEG14('9', 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0), + +#define _MAP_58_64_ASCII_SEG14_SYMBOL \ + _SEG14(':', 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0), \ + _SEG14(';', 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0), \ + _SEG14('<', 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1), \ + _SEG14('=', 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0), \ + _SEG14('>', 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0), \ + _SEG14('?', 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0), \ + _SEG14('@', 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0), + +#define _MAP_65_90_ASCII_SEG14_ALPHA_UPPER \ + _SEG14('A', 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0), \ + _SEG14('B', 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0), \ + _SEG14('C', 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('D', 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0), \ + _SEG14('E', 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('F', 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('G', 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0), \ + _SEG14('H', 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0), \ + _SEG14('I', 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0), \ + _SEG14('J', 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('K', 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1), \ + _SEG14('L', 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('M', 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0), \ + _SEG14('N', 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1), \ + _SEG14('O', 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('P', 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0), \ + _SEG14('Q', 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1), \ + _SEG14('R', 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1), \ + _SEG14('S', 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0), \ + _SEG14('T', 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0), \ + _SEG14('U', 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('V', 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0), \ + _SEG14('W', 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1), \ + _SEG14('X', 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1), \ + _SEG14('Y', 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0), \ + _SEG14('Z', 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0), + +#define _MAP_91_96_ASCII_SEG14_SYMBOL \ + _SEG14('[', 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('\\',0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1), \ + _SEG14(']', 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('^', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1), \ + _SEG14('_', 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('`', 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0), + +#define _MAP_97_122_ASCII_SEG14_ALPHA_LOWER \ + _SEG14('a', 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0), \ + _SEG14('b', 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1), \ + _SEG14('c', 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0), \ + _SEG14('d', 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0), \ + _SEG14('e', 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0), \ + _SEG14('f', 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0), \ + _SEG14('g', 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0), \ + _SEG14('h', 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0), \ + _SEG14('i', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), \ + _SEG14('j', 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0), \ + _SEG14('k', 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1), \ + _SEG14('l', 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('m', 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0), \ + _SEG14('n', 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0), \ + _SEG14('o', 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0), \ + _SEG14('p', 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0), \ + _SEG14('q', 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0), \ + _SEG14('r', 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('s', 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1), \ + _SEG14('t', 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('u', 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), \ + _SEG14('v', 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0), \ + _SEG14('w', 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1), \ + _SEG14('x', 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1), \ + _SEG14('y', 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0), \ + _SEG14('z', 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0), + +#define _MAP_123_126_ASCII_SEG14_SYMBOL \ + _SEG14('{', 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0), \ + _SEG14('|', 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0), \ + _SEG14('}', 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1), \ + _SEG14('~', 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0), + +/* Maps */ +#define MAP_ASCII14SEG_ALPHANUM \ + _MAP_0_32_ASCII_SEG14_NON_PRINTABLE \ + _MAP_33_47_ASCII_SEG14_SYMBOL \ + _MAP_48_57_ASCII_SEG14_NUMERIC \ + _MAP_58_64_ASCII_SEG14_SYMBOL \ + _MAP_65_90_ASCII_SEG14_ALPHA_UPPER \ + _MAP_91_96_ASCII_SEG14_SYMBOL \ + _MAP_97_122_ASCII_SEG14_ALPHA_LOWER \ + _MAP_123_126_ASCII_SEG14_SYMBOL + +#define SEG14_DEFAULT_MAP(_name) \ + SEG14_CONVERSION_MAP(_name, MAP_ASCII14SEG_ALPHANUM) + +#endif /* MAP_TO_14SEGMENT_H */ -- cgit v1.2.3 From 9eeb3aa33ae005526f672b394c1791578463513f Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Thu, 21 Oct 2021 21:47:51 +0800 Subject: bpf: Add bpf_skc_to_unix_sock() helper The helper is used in tracing programs to cast a socket pointer to a unix_sock pointer. The return value could be NULL if the casting is illegal. Suggested-by: Yonghong Song Signed-off-by: Hengqi Chen Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211021134752.1223426-2-hengqi.chen@gmail.com --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6fc59d61937a..22e7a3f38b9f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4909,6 +4909,12 @@ union bpf_attr { * Return * The number of bytes written to the buffer, or a negative error * in case of failure. + * + * struct unix_sock *bpf_skc_to_unix_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *unix_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5089,6 +5095,7 @@ union bpf_attr { FN(task_pt_regs), \ FN(get_branch_snapshot), \ FN(trace_vprintk), \ + FN(skc_to_unix_sock), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From aba64c7da98330141dcdadd5612f088043a83696 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Wed, 20 Oct 2021 00:48:17 -0700 Subject: bpf: Add verified_insns to bpf_prog_info and fdinfo This stat is currently printed in the verifier log and not stored anywhere. To ease consumption of this data, add a field to bpf_prog_aux so it can be exposed via BPF_OBJ_GET_INFO_BY_FD and fdinfo. Signed-off-by: Dave Marchevsky Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211020074818.1017682-2-davemarchevsky@fb.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 22e7a3f38b9f..c10820037883 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5620,6 +5620,7 @@ struct bpf_prog_info { __u64 run_time_ns; __u64 run_cnt; __u64 recursion_misses; + __u32 verified_insns; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3 From 63dfe0709643528290c8a6825f278eda0e3f3c2e Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Sat, 18 Sep 2021 18:56:32 +0900 Subject: can: bittiming: allow TDC{V,O} to be zero and add can_tdc_const::tdc{v,o,f}_min ISO 11898-1 specifies in section 11.3.3 "Transmitter delay compensation" that "the configuration range for [the] SSP position shall be at least 0 to 63 minimum time quanta." Because SSP = TDCV + TDCO, it means that we should allow both TDCV and TDCO to hold zero value in order to honor SSP's minimum possible value. However, current implementation assigned special meaning to TDCV and TDCO's zero values: * TDCV = 0 -> TDCV is automatically measured by the transceiver. * TDCO = 0 -> TDC is off. In order to allow for those values to really be zero and to maintain current features, we introduce two new flags: * CAN_CTRLMODE_TDC_AUTO indicates that the controller support automatic measurement of TDCV. * CAN_CTRLMODE_TDC_MANUAL indicates that the controller support manual configuration of TDCV. N.B.: current implementation failed to provide an option for the driver to indicate that only manual mode was supported. TDC is disabled if both CAN_CTRLMODE_TDC_AUTO and CAN_CTRLMODE_TDC_MANUAL flags are off, c.f. the helper function can_tdc_is_enabled() which is also introduced in this patch. Also, this patch adds three fields: tdcv_min, tdco_min and tdcf_min to struct can_tdc_const. While we are not convinced that those three fields could be anything else than zero, we can imagine that some controllers might specify a lower bound on these. Thus, those minimums are really added "just in case". Comments of struct can_tdc and can_tdc_const are updated accordingly. Finally, the changes are applied to the etas_es58x driver. Link: https://lore.kernel.org/all/20210918095637.20108-2-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index f730d443b918..004cd09a7d49 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -101,6 +101,8 @@ struct can_ctrlmode { #define CAN_CTRLMODE_PRESUME_ACK 0x40 /* Ignore missing CAN ACKs */ #define CAN_CTRLMODE_FD_NON_ISO 0x80 /* CAN FD in non-ISO mode */ #define CAN_CTRLMODE_CC_LEN8_DLC 0x100 /* Classic CAN DLC option */ +#define CAN_CTRLMODE_TDC_AUTO 0x200 /* CAN transiver automatically calculates TDCV */ +#define CAN_CTRLMODE_TDC_MANUAL 0x400 /* TDCV is manually set up by user */ /* * CAN device statistics -- cgit v1.2.3 From d99755f71a80df33b981484f0d3bb956ed15a247 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Sat, 18 Sep 2021 18:56:35 +0900 Subject: can: netlink: add interface for CAN-FD Transmitter Delay Compensation (TDC) Add the netlink interface for TDC parameters of struct can_tdc_const and can_tdc. Contrary to the can_bittiming(_const) structures for which there is just a single IFLA_CAN(_DATA)_BITTMING(_CONST) entry per structure, here, we create a nested entry IFLA_CAN_TDC. Within this nested entry, additional IFLA_CAN_TDC_TDC* entries are added for each of the TDC parameters of the newly introduced struct can_tdc_const and struct can_tdc. For struct can_tdc_const, these are: IFLA_CAN_TDC_TDCV_MIN IFLA_CAN_TDC_TDCV_MAX IFLA_CAN_TDC_TDCO_MIN IFLA_CAN_TDC_TDCO_MAX IFLA_CAN_TDC_TDCF_MIN IFLA_CAN_TDC_TDCF_MAX For struct can_tdc, these are: IFLA_CAN_TDC_TDCV IFLA_CAN_TDC_TDCO IFLA_CAN_TDC_TDCF This is done so that changes can be applied in the future to the structures without breaking the netlink interface. The TDC netlink logic works as follow: * CAN_CTRLMODE_FD is not provided: - if any TDC parameters are provided: error. - TDC parameters not provided: TDC parameters unchanged. * CAN_CTRLMODE_FD is provided and is false: - TDC is deactivated: both the structure and the CAN_CTRLMODE_TDC_{AUTO,MANUAL} flags are flushed. * CAN_CTRLMODE_FD provided and is true: - CAN_CTRLMODE_TDC_{AUTO,MANUAL} and tdc{v,o,f} not provided: call can_calc_tdco() to automatically decide whether TDC should be activated and, if so, set CAN_CTRLMODE_TDC_AUTO and uses the calculated tdco value. - CAN_CTRLMODE_TDC_AUTO and tdco provided: set CAN_CTRLMODE_TDC_AUTO and use the provided tdco value. Here, tdcv is illegal and tdcf is optional. - CAN_CTRLMODE_TDC_MANUAL and both of tdcv and tdco provided: set CAN_CTRLMODE_TDC_MANUAL and use the provided tdcv and tdco value. Here, tdcf is optional. - CAN_CTRLMODE_TDC_{AUTO,MANUAL} are mutually exclusive. Whenever one flag is turned on, the other will automatically be turned off. Providing both returns an error. - Combination other than the one listed above are illegal and will return an error. N.B. above rules mean that whenever CAN_CTRLMODE_FD is provided, the previous TDC values will be overwritten. The only option to reuse previous TDC value is to not provide CAN_CTRLMODE_FD. All the new parameters are defined as u32. This arbitrary choice is done to mimic the other bittiming values with are also all of type u32. An u16 would have been sufficient to hold the TDC values. This patch completes below series (c.f. [1]): - commit 289ea9e4ae59 ("can: add new CAN FD bittiming parameters: Transmitter Delay Compensation (TDC)") - commit c25cc7993243 ("can: bittiming: add calculation for CAN FD Transmitter Delay Compensation (TDC)") [1] https://lore.kernel.org/linux-can/20210224002008.4158-1-mailhol.vincent@wanadoo.fr/T/#t Link: https://lore.kernel.org/all/20210918095637.20108-5-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/netlink.h | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index 004cd09a7d49..75b85c60efb2 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -136,10 +136,35 @@ enum { IFLA_CAN_BITRATE_CONST, IFLA_CAN_DATA_BITRATE_CONST, IFLA_CAN_BITRATE_MAX, - __IFLA_CAN_MAX + IFLA_CAN_TDC, + + /* add new constants above here */ + __IFLA_CAN_MAX, + IFLA_CAN_MAX = __IFLA_CAN_MAX - 1 }; -#define IFLA_CAN_MAX (__IFLA_CAN_MAX - 1) +/* + * CAN FD Transmitter Delay Compensation (TDC) + * + * Please refer to struct can_tdc_const and can_tdc in + * include/linux/can/bittiming.h for further details. + */ +enum { + IFLA_CAN_TDC_UNSPEC, + IFLA_CAN_TDC_TDCV_MIN, /* u32 */ + IFLA_CAN_TDC_TDCV_MAX, /* u32 */ + IFLA_CAN_TDC_TDCO_MIN, /* u32 */ + IFLA_CAN_TDC_TDCO_MAX, /* u32 */ + IFLA_CAN_TDC_TDCF_MIN, /* u32 */ + IFLA_CAN_TDC_TDCF_MAX, /* u32 */ + IFLA_CAN_TDC_TDCV, /* u32 */ + IFLA_CAN_TDC_TDCO, /* u32 */ + IFLA_CAN_TDC_TDCF, /* u32 */ + + /* add new constants above here */ + __IFLA_CAN_TDC, + IFLA_CAN_TDC_MAX = __IFLA_CAN_TDC - 1 +}; /* u16 termination range: 1..65535 Ohms */ #define CAN_TERMINATION_DISABLED 0 -- cgit v1.2.3 From a9d496d8e08ca1eb43d14cb734839b24ab0e8083 Mon Sep 17 00:00:00 2001 From: David Edmondson Date: Mon, 20 Sep 2021 11:37:34 +0100 Subject: KVM: x86: Clarify the kvm_run.emulation_failure structure layout Until more flags for kvm_run.emulation_failure flags are defined, it is undetermined whether new payload elements corresponding to those flags will be additive or alternative. As a hint to userspace that an alternative is possible, wrap the current payload elements in a union. Suggested-by: Sean Christopherson Signed-off-by: David Edmondson Reviewed-by: Sean Christopherson Message-Id: <20210920103737.2696756-2-david.edmondson@oracle.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5ca5ffe16cb4..2c8aa8d4dac1 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -403,8 +403,12 @@ struct kvm_run { __u32 suberror; __u32 ndata; __u64 flags; - __u8 insn_size; - __u8 insn_bytes[15]; + union { + struct { + __u8 insn_size; + __u8 insn_bytes[15]; + }; + }; } emulation_failure; /* KVM_EXIT_OSI */ struct { -- cgit v1.2.3 From e615e355894e619785af81479ad6f5a05a8a2e3f Mon Sep 17 00:00:00 2001 From: David Edmondson Date: Mon, 20 Sep 2021 11:37:36 +0100 Subject: KVM: x86: On emulation failure, convey the exit reason, etc. to userspace Should instruction emulation fail, include the VM exit reason, etc. in the emulation_failure data passed to userspace, in order that the VMM can report it as a debugging aid when describing the failure. Suggested-by: Joao Martins Signed-off-by: David Edmondson Reviewed-by: Sean Christopherson Message-Id: <20210920103737.2696756-4-david.edmondson@oracle.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 2c8aa8d4dac1..78f0719cc2a3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -398,6 +398,11 @@ struct kvm_run { * "ndata" is correct, that new fields are enumerated in "flags", * and that each flag enumerates fields that are 64-bit aligned * and sized (so that ndata+internal.data[] is valid/accurate). + * + * Space beyond the defined fields may be used to store arbitrary + * debug information relating to the emulation failure. It is + * accounted for in "ndata" but the format is unspecified and is + * not represented in "flags". Any such information is *not* ABI! */ struct { __u32 suberror; @@ -409,6 +414,7 @@ struct kvm_run { __u8 insn_bytes[15]; }; }; + /* Arbitrary debug data may follow. */ } emulation_failure; /* KVM_EXIT_OSI */ struct { -- cgit v1.2.3 From 1cf4e9a6fbdbc9850216a2a6d8ed52888679a077 Mon Sep 17 00:00:00 2001 From: Luo Jie Date: Sun, 24 Oct 2021 16:27:33 +0800 Subject: net: phy: add constants for fast retrain related register Add the constants for 2.5G fast retrain capability in 10G AN control register, fast retrain status and control register and THP bypass register into mdio.h. Signed-off-by: Luo Jie Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/mdio.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index bdf77dffa5a4..c54e6eae5366 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -53,12 +53,14 @@ #define MDIO_AN_EEE_LPABLE 61 /* EEE link partner ability */ #define MDIO_AN_EEE_ADV2 62 /* EEE advertisement 2 */ #define MDIO_AN_EEE_LPABLE2 63 /* EEE link partner ability 2 */ +#define MDIO_AN_CTRL2 64 /* AN THP bypass request control */ /* Media-dependent registers. */ #define MDIO_PMA_10GBT_SWAPPOL 130 /* 10GBASE-T pair swap & polarity */ #define MDIO_PMA_10GBT_TXPWR 131 /* 10GBASE-T TX power control */ #define MDIO_PMA_10GBT_SNR 133 /* 10GBASE-T SNR margin, lane A. * Lanes B-D are numbered 134-136. */ +#define MDIO_PMA_10GBR_FSRT_CSR 147 /* 10GBASE-R fast retrain status and control */ #define MDIO_PMA_10GBR_FECABLE 170 /* 10GBASE-R FEC ability */ #define MDIO_PCS_10GBX_STAT1 24 /* 10GBASE-X PCS status 1 */ #define MDIO_PCS_10GBRT_STAT1 32 /* 10GBASE-R/-T PCS status 1 */ @@ -239,6 +241,9 @@ #define MDIO_PMA_10GBR_FECABLE_ABLE 0x0001 /* FEC ability */ #define MDIO_PMA_10GBR_FECABLE_ERRABLE 0x0002 /* FEC error indic. ability */ +/* PMA 10GBASE-R Fast Retrain status and control register. */ +#define MDIO_PMA_10GBR_FSRT_ENABLE 0x0001 /* Fast retrain enable */ + /* PCS 10GBASE-R/-T status register 1. */ #define MDIO_PCS_10GBRT_STAT1_BLKLK 0x0001 /* Block lock attained */ @@ -247,6 +252,7 @@ #define MDIO_PCS_10GBRT_STAT2_BER 0x3f00 /* AN 10GBASE-T control register. */ +#define MDIO_AN_10GBT_CTRL_ADVFSRT2_5G 0x0020 /* Advertise 2.5GBASE-T fast retrain */ #define MDIO_AN_10GBT_CTRL_ADV2_5G 0x0080 /* Advertise 2.5GBASE-T */ #define MDIO_AN_10GBT_CTRL_ADV5G 0x0100 /* Advertise 5GBASE-T */ #define MDIO_AN_10GBT_CTRL_ADV10G 0x1000 /* Advertise 10GBASE-T */ @@ -289,6 +295,9 @@ #define MDIO_EEE_2_5GT 0x0001 /* 2.5GT EEE cap */ #define MDIO_EEE_5GT 0x0002 /* 5GT EEE cap */ +/* AN MultiGBASE-T AN control 2 */ +#define MDIO_AN_THP_BP2_5GT 0x0008 /* 2.5GT THP bypass request */ + /* 2.5G/5G Extended abilities register. */ #define MDIO_PMA_NG_EXTABLE_2_5GBT 0x0001 /* 2.5GBASET ability */ #define MDIO_PMA_NG_EXTABLE_5GBT 0x0002 /* 5GBASET ability */ -- cgit v1.2.3 From 99ce45d5e7dbde399997a630f45ac9f654fa4bcc Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 26 Oct 2021 09:57:28 +0800 Subject: mctp: Implement extended addressing This change allows an extended address struct - struct sockaddr_mctp_ext - to be passed to sendmsg/recvmsg. This allows userspace to specify output ifindex and physical address information (for sendmsg) or receive the input ifindex/physaddr for incoming messages (for recvmsg). This is typically used by userspace for MCTP address discovery and assignment operations. The extended addressing facility is conditional on a new sockopt: MCTP_OPT_ADDR_EXT; userspace must explicitly enable addressing before the kernel will consume/populate the extended address data. Includes a fix for an uninitialised var: Reported-by: kernel test robot Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/uapi/linux/mctp.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 6acd4ccafbf7..07b0318716fc 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -11,6 +11,7 @@ #include #include +#include typedef __u8 mctp_eid_t; @@ -28,6 +29,14 @@ struct sockaddr_mctp { __u8 __smctp_pad1; }; +struct sockaddr_mctp_ext { + struct sockaddr_mctp smctp_base; + int smctp_ifindex; + __u8 smctp_halen; + __u8 __smctp_pad0[3]; + __u8 smctp_haddr[MAX_ADDR_LEN]; +}; + #define MCTP_NET_ANY 0x0 #define MCTP_ADDR_NULL 0x00 @@ -36,4 +45,6 @@ struct sockaddr_mctp { #define MCTP_TAG_MASK 0x07 #define MCTP_TAG_OWNER 0x08 +#define MCTP_OPT_ADDR_EXT 1 + #endif /* __UAPI_MCTP_H */ -- cgit v1.2.3 From 8d11a4f43ef4679be0908026907a7613b33d7127 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 25 Oct 2021 16:27:33 -0300 Subject: fanotify: Reserve UAPI bits for FAN_FS_ERROR FAN_FS_ERROR allows reporting of event type FS_ERROR to userspace, which is a mechanism to report file system wide problems via fanotify. This commit preallocate userspace visible bits to match the FS_ERROR event. Link: https://lore.kernel.org/r/20211025192746.66445-19-krisman@collabora.com Reviewed-by: Jan Kara Reviewed-by: Amir Goldstein Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 64553df9d735..2990731ddc8b 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -20,6 +20,7 @@ #define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FAN_FS_ERROR 0x00008000 /* Filesystem error */ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ -- cgit v1.2.3 From 130a3c742107acff985541c28360c8b40203559c Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 25 Oct 2021 16:27:42 -0300 Subject: fanotify: Emit generic error info for error event The error info is a record sent to users on FAN_FS_ERROR events documenting the type of error. It also carries an error count, documenting how many errors were observed since the last reporting. Link: https://lore.kernel.org/r/20211025192746.66445-28-krisman@collabora.com Reviewed-by: Amir Goldstein Reviewed-by: Jan Kara Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 2990731ddc8b..bd1932c2074d 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -126,6 +126,7 @@ struct fanotify_event_metadata { #define FAN_EVENT_INFO_TYPE_DFID_NAME 2 #define FAN_EVENT_INFO_TYPE_DFID 3 #define FAN_EVENT_INFO_TYPE_PIDFD 4 +#define FAN_EVENT_INFO_TYPE_ERROR 5 /* Variable length info record following event metadata */ struct fanotify_event_info_header { @@ -160,6 +161,12 @@ struct fanotify_event_info_pidfd { __s32 pidfd; }; +struct fanotify_event_info_error { + struct fanotify_event_info_header hdr; + __s32 error; + __u32 error_count; +}; + struct fanotify_response { __s32 fd; __u32 response; -- cgit v1.2.3 From 2cc1ae4878282c75a569e8ec677d569601c99dda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Wei=C3=9F?= Date: Sat, 4 Sep 2021 11:59:28 +0200 Subject: dm: introduce audit event module for device mapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be able to send auditing events to user space, we introduce a generic dm-audit module. It provides helper functions to emit audit events through the kernel audit subsystem. We claim the AUDIT_DM_CTRL type=1336 and AUDIT_DM_EVENT type=1337 out of the audit event messages range in the corresponding userspace api in 'include/uapi/linux/audit.h' for those events. AUDIT_DM_CTRL is used to provide information about creation and destruction of device mapper targets which are triggered by user space admin control actions. AUDIT_DM_EVENT is used to provide information about actual errors during operation of the mapped device, showing e.g. integrity violations in audit log. Following commits to device mapper targets actually will make use of this to emit those events in relevant cases. The audit logs look like this if executing the following simple test: # dd if=/dev/zero of=test.img bs=1M count=1024 # losetup -f test.img # integritysetup -vD format --integrity sha256 -t 32 /dev/loop0 # integritysetup open -D /dev/loop0 --integrity sha256 integritytest # integritysetup status integritytest # integritysetup close integritytest # integritysetup open -D /dev/loop0 --integrity sha256 integritytest # integritysetup status integritytest # dd if=/dev/urandom of=/dev/loop0 bs=512 count=1 seek=100000 # dd if=/dev/mapper/integritytest of=/dev/null ------------------------- audit.log from auditd type=UNKNOWN[1336] msg=audit(1630425039.363:184): module=integrity op=ctr ppid=3807 pid=3819 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup" exe="/sbin/integritysetup" subj==unconfined dev=254:3 error_msg='success' res=1 type=UNKNOWN[1336] msg=audit(1630425039.471:185): module=integrity op=dtr ppid=3807 pid=3819 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup" exe="/sbin/integritysetup" subj==unconfined dev=254:3 error_msg='success' res=1 type=UNKNOWN[1336] msg=audit(1630425039.611:186): module=integrity op=ctr ppid=3807 pid=3819 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup" exe="/sbin/integritysetup" subj==unconfined dev=254:3 error_msg='success' res=1 type=UNKNOWN[1336] msg=audit(1630425054.475:187): module=integrity op=dtr ppid=3807 pid=3819 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup" exe="/sbin/integritysetup" subj==unconfined dev=254:3 error_msg='success' res=1 type=UNKNOWN[1336] msg=audit(1630425073.171:191): module=integrity op=ctr ppid=3807 pid=3883 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup" exe="/sbin/integritysetup" subj==unconfined dev=254:3 error_msg='success' res=1 type=UNKNOWN[1336] msg=audit(1630425087.239:192): module=integrity op=dtr ppid=3807 pid=3902 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup" exe="/sbin/integritysetup" subj==unconfined dev=254:3 error_msg='success' res=1 type=UNKNOWN[1336] msg=audit(1630425093.755:193): module=integrity op=ctr ppid=3807 pid=3906 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup" exe="/sbin/integritysetup" subj==unconfined dev=254:3 error_msg='success' res=1 type=UNKNOWN[1337] msg=audit(1630425112.119:194): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:195): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:196): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:197): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:198): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:199): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:200): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:201): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:202): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:203): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 Signed-off-by: Michael Weiß Signed-off-by: Paul Moore # fix audit.h numbering Signed-off-by: Mike Snitzer --- include/uapi/linux/audit.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index daa481729e9b..809e4c2041b3 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -118,6 +118,8 @@ #define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */ #define AUDIT_BPF 1334 /* BPF subsystem */ #define AUDIT_EVENT_LISTENER 1335 /* Task joined multicast read socket */ +#define AUDIT_DM_CTRL 1338 /* Device Mapper target control */ +#define AUDIT_DM_EVENT 1339 /* Device Mapper events */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ -- cgit v1.2.3 From a390ccb316beb8ea594b8695d53926710ca454a3 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 24 Oct 2021 16:26:07 +0300 Subject: fuse: add FOPEN_NOFLUSH Add flag returned by FUSE_OPEN and FUSE_CREATE requests to avoid flushing data cache on close. Different filesystems implement ->flush() is different ways: - Most disk filesystems do not implement ->flush() at all - Some network filesystem (e.g. nfs) flush local write cache of FMODE_WRITE file and send a "flush" command to server - Some network filesystem (e.g. cifs) flush local write cache of FMODE_WRITE file without sending an additional command to server FUSE flushes local write cache of ANY file, even non FMODE_WRITE and sends a "flush" command to server (if server implements it). The FUSE implementation of ->flush() seems over agressive and arbitrary and does not make a lot of sense when writeback caching is disabled. Instead of deciding on another arbitrary implementation that makes sense, leave the choice of per-file flush behavior in the hands of the server. Link: https://lore.kernel.org/linux-fsdevel/CAJfpegspE8e6aKd47uZtSYX8Y-1e1FWS0VL0DH2Skb9gQP5RJQ@mail.gmail.com/ Suggested-by: Miklos Szeredi Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 36ed092227fa..a1dc3ee1d17c 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -184,6 +184,9 @@ * * 7.34 * - add FUSE_SYNCFS + * + * 7.35 + * - add FOPEN_NOFLUSH */ #ifndef _LINUX_FUSE_H @@ -219,7 +222,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 34 +#define FUSE_KERNEL_MINOR_VERSION 35 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -290,12 +293,14 @@ struct fuse_file_lock { * FOPEN_NONSEEKABLE: the file is not seekable * FOPEN_CACHE_DIR: allow caching this directory * FOPEN_STREAM: the file is stream-like (no file position at all) + * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) #define FOPEN_NONSEEKABLE (1 << 2) #define FOPEN_CACHE_DIR (1 << 3) #define FOPEN_STREAM (1 << 4) +#define FOPEN_NOFLUSH (1 << 5) /** * INIT request/reply flags -- cgit v1.2.3 From 9330986c03006ab1d33d243b7cfe598a7a3c1baa Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 27 Oct 2021 16:45:00 -0700 Subject: bpf: Add bloom filter map implementation This patch adds the kernel-side changes for the implementation of a bpf bloom filter map. The bloom filter map supports peek (determining whether an element is present in the map) and push (adding an element to the map) operations.These operations are exposed to userspace applications through the already existing syscalls in the following way: BPF_MAP_LOOKUP_ELEM -> peek BPF_MAP_UPDATE_ELEM -> push The bloom filter map does not have keys, only values. In light of this, the bloom filter map's API matches that of queue stack maps: user applications use BPF_MAP_LOOKUP_ELEM/BPF_MAP_UPDATE_ELEM which correspond internally to bpf_map_peek_elem/bpf_map_push_elem, and bpf programs must use the bpf_map_peek_elem and bpf_map_push_elem APIs to query or add an element to the bloom filter map. When the bloom filter map is created, it must be created with a key_size of 0. For updates, the user will pass in the element to add to the map as the value, with a NULL key. For lookups, the user will pass in the element to query in the map as the value, with a NULL key. In the verifier layer, this requires us to modify the argument type of a bloom filter's BPF_FUNC_map_peek_elem call to ARG_PTR_TO_MAP_VALUE; as well, in the syscall layer, we need to copy over the user value so that in bpf_map_peek_elem, we know which specific value to query. A few things to please take note of: * If there are any concurrent lookups + updates, the user is responsible for synchronizing this to ensure no false negative lookups occur. * The number of hashes to use for the bloom filter is configurable from userspace. If no number is specified, the default used will be 5 hash functions. The benchmarks later in this patchset can help compare the performance of using different number of hashes on different entry sizes. In general, using more hashes decreases both the false positive rate and the speed of a lookup. * Deleting an element in the bloom filter map is not supported. * The bloom filter map may be used as an inner map. * The "max_entries" size that is specified at map creation time is used to approximate a reasonable bitmap size for the bloom filter, and is not otherwise strictly enforced. If the user wishes to insert more entries into the bloom filter than "max_entries", they may do so but they should be aware that this may lead to a higher false positive rate. Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211027234504.30744-2-joannekoong@fb.com --- include/uapi/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c10820037883..8bead4aa3ad0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -906,6 +906,7 @@ enum bpf_map_type { BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, + BPF_MAP_TYPE_BLOOM_FILTER, }; /* Note that tracing related programs such as @@ -1274,6 +1275,13 @@ union bpf_attr { * struct stored as the * map value */ + /* Any per-map-type extra fields + * + * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the + * number of hash functions (if 0, the bloom filter will default + * to using 5 hash functions). + */ + __u64 map_extra; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -5638,6 +5646,7 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u64 map_extra; } __attribute__((aligned(8))); struct bpf_btf_info { -- cgit v1.2.3 From d6aef08a872b9e23eecc92d0e92393473b13c497 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 28 Oct 2021 12:04:54 +0530 Subject: bpf: Add bpf_kallsyms_lookup_name helper This helper allows us to get the address of a kernel symbol from inside a BPF_PROG_TYPE_SYSCALL prog (used by gen_loader), so that we can relocate typeless ksym vars. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211028063501.2239335-2-memxor@gmail.com --- include/uapi/linux/bpf.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8bead4aa3ad0..bd0c9f0487f6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4923,6 +4923,21 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *unix_sock* pointer. * Return * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res) + * Description + * Get the address of a kernel symbol, returned in *res*. *res* is + * set to 0 if the symbol is not found. + * Return + * On success, zero. On error, a negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-EINVAL** if string *name* is not the same size as *name_sz*. + * + * **-ENOENT** if symbol is not found. + * + * **-EPERM** if caller does not have permission to obtain kernel address. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5104,6 +5119,7 @@ union bpf_attr { FN(get_branch_snapshot), \ FN(trace_vprintk), \ FN(skc_to_unix_sock), \ + FN(kallsyms_lookup_name), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From e77fbf990316d47968a793d8aa920fd62385aec9 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 22 Oct 2021 16:53:36 +0200 Subject: btrfs: send: prepare for v2 protocol This is preparatory work for send protocol update to version 2 and higher. We have many pending protocol update requests but still don't have the basic protocol rev in place, the first thing that must happen is to do the actual versioning support. The protocol version is u32 and is a new member in the send ioctl struct. Validity of the version field is backed by a new flag bit. Old kernels would fail when a higher version is requested. Version protocol 0 will pick the highest supported version, BTRFS_SEND_STREAM_VERSION, that's also exported in sysfs. The version is still unchanged and will be increased once we have new incompatible commands or stream updates. Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index d7d3cfead056..738619994e26 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -771,10 +771,16 @@ struct btrfs_ioctl_received_subvol_args { */ #define BTRFS_SEND_FLAG_OMIT_END_CMD 0x4 +/* + * Read the protocol version in the structure + */ +#define BTRFS_SEND_FLAG_VERSION 0x8 + #define BTRFS_SEND_FLAG_MASK \ (BTRFS_SEND_FLAG_NO_FILE_DATA | \ BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \ - BTRFS_SEND_FLAG_OMIT_END_CMD) + BTRFS_SEND_FLAG_OMIT_END_CMD | \ + BTRFS_SEND_FLAG_VERSION) struct btrfs_ioctl_send_args { __s64 send_fd; /* in */ @@ -782,7 +788,8 @@ struct btrfs_ioctl_send_args { __u64 __user *clone_sources; /* in */ __u64 parent_root; /* in */ __u64 flags; /* in */ - __u64 reserved[4]; /* in */ + __u32 version; /* in */ + __u8 reserved[28]; /* in */ }; /* -- cgit v1.2.3 From cf2197ca4b8c199d188593ca6800ea1827c42171 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 29 Oct 2021 14:09:29 +0800 Subject: bcache: move uapi header bcache.h to bcache code directory The header file include/uapi/linux/bcache.h is not really a user space API heaer. This file defines the ondisk format of bcache internal meta data but no one includes it from user space, bcache-tools has its own copy of this header with minor modification. Therefore, this patch moves include/uapi/linux/bcache.h to bcache code directory as drivers/md/bcache/bcache_ondisk.h. Suggested-by: Arnd Bergmann Suggested-by: Christoph Hellwig Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211029060930.119923-2-colyli@suse.de Signed-off-by: Jens Axboe --- include/uapi/linux/bcache.h | 445 -------------------------------------------- 1 file changed, 445 deletions(-) delete mode 100644 include/uapi/linux/bcache.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h deleted file mode 100644 index 97413586195b..000000000000 --- a/include/uapi/linux/bcache.h +++ /dev/null @@ -1,445 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _LINUX_BCACHE_H -#define _LINUX_BCACHE_H - -/* - * Bcache on disk data structures - */ - -#include - -#define BITMASK(name, type, field, offset, size) \ -static inline __u64 name(const type *k) \ -{ return (k->field >> offset) & ~(~0ULL << size); } \ - \ -static inline void SET_##name(type *k, __u64 v) \ -{ \ - k->field &= ~(~(~0ULL << size) << offset); \ - k->field |= (v & ~(~0ULL << size)) << offset; \ -} - -/* Btree keys - all units are in sectors */ - -struct bkey { - __u64 high; - __u64 low; - __u64 ptr[]; -}; - -#define KEY_FIELD(name, field, offset, size) \ - BITMASK(name, struct bkey, field, offset, size) - -#define PTR_FIELD(name, offset, size) \ -static inline __u64 name(const struct bkey *k, unsigned int i) \ -{ return (k->ptr[i] >> offset) & ~(~0ULL << size); } \ - \ -static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v) \ -{ \ - k->ptr[i] &= ~(~(~0ULL << size) << offset); \ - k->ptr[i] |= (v & ~(~0ULL << size)) << offset; \ -} - -#define KEY_SIZE_BITS 16 -#define KEY_MAX_U64S 8 - -KEY_FIELD(KEY_PTRS, high, 60, 3) -KEY_FIELD(__PAD0, high, 58, 2) -KEY_FIELD(KEY_CSUM, high, 56, 2) -KEY_FIELD(__PAD1, high, 55, 1) -KEY_FIELD(KEY_DIRTY, high, 36, 1) - -KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS) -KEY_FIELD(KEY_INODE, high, 0, 20) - -/* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */ - -static inline __u64 KEY_OFFSET(const struct bkey *k) -{ - return k->low; -} - -static inline void SET_KEY_OFFSET(struct bkey *k, __u64 v) -{ - k->low = v; -} - -/* - * The high bit being set is a relic from when we used it to do binary - * searches - it told you where a key started. It's not used anymore, - * and can probably be safely dropped. - */ -#define KEY(inode, offset, size) \ -((struct bkey) { \ - .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode), \ - .low = (offset) \ -}) - -#define ZERO_KEY KEY(0, 0, 0) - -#define MAX_KEY_INODE (~(~0 << 20)) -#define MAX_KEY_OFFSET (~0ULL >> 1) -#define MAX_KEY KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0) - -#define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k)) -#define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0) - -#define PTR_DEV_BITS 12 - -PTR_FIELD(PTR_DEV, 51, PTR_DEV_BITS) -PTR_FIELD(PTR_OFFSET, 8, 43) -PTR_FIELD(PTR_GEN, 0, 8) - -#define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) - -#define MAKE_PTR(gen, offset, dev) \ - ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) - -/* Bkey utility code */ - -static inline unsigned long bkey_u64s(const struct bkey *k) -{ - return (sizeof(struct bkey) / sizeof(__u64)) + KEY_PTRS(k); -} - -static inline unsigned long bkey_bytes(const struct bkey *k) -{ - return bkey_u64s(k) * sizeof(__u64); -} - -#define bkey_copy(_dest, _src) memcpy(_dest, _src, bkey_bytes(_src)) - -static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src) -{ - SET_KEY_INODE(dest, KEY_INODE(src)); - SET_KEY_OFFSET(dest, KEY_OFFSET(src)); -} - -static inline struct bkey *bkey_next(const struct bkey *k) -{ - __u64 *d = (void *) k; - - return (struct bkey *) (d + bkey_u64s(k)); -} - -static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys) -{ - __u64 *d = (void *) k; - - return (struct bkey *) (d + nr_keys); -} -/* Enough for a key with 6 pointers */ -#define BKEY_PAD 8 - -#define BKEY_PADDED(key) \ - union { struct bkey key; __u64 key ## _pad[BKEY_PAD]; } - -/* Superblock */ - -/* Version 0: Cache device - * Version 1: Backing device - * Version 2: Seed pointer into btree node checksum - * Version 3: Cache device with new UUID format - * Version 4: Backing device with data offset - */ -#define BCACHE_SB_VERSION_CDEV 0 -#define BCACHE_SB_VERSION_BDEV 1 -#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 -#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 -#define BCACHE_SB_VERSION_CDEV_WITH_FEATURES 5 -#define BCACHE_SB_VERSION_BDEV_WITH_FEATURES 6 -#define BCACHE_SB_MAX_VERSION 6 - -#define SB_SECTOR 8 -#define SB_OFFSET (SB_SECTOR << SECTOR_SHIFT) -#define SB_SIZE 4096 -#define SB_LABEL_SIZE 32 -#define SB_JOURNAL_BUCKETS 256U -/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ -#define MAX_CACHES_PER_SET 8 - -#define BDEV_DATA_START_DEFAULT 16 /* sectors */ - -struct cache_sb_disk { - __le64 csum; - __le64 offset; /* sector where this sb was written */ - __le64 version; - - __u8 magic[16]; - - __u8 uuid[16]; - union { - __u8 set_uuid[16]; - __le64 set_magic; - }; - __u8 label[SB_LABEL_SIZE]; - - __le64 flags; - __le64 seq; - - __le64 feature_compat; - __le64 feature_incompat; - __le64 feature_ro_compat; - - __le64 pad[5]; - - union { - struct { - /* Cache devices */ - __le64 nbuckets; /* device size */ - - __le16 block_size; /* sectors */ - __le16 bucket_size; /* sectors */ - - __le16 nr_in_set; - __le16 nr_this_dev; - }; - struct { - /* Backing devices */ - __le64 data_offset; - - /* - * block_size from the cache device section is still used by - * backing devices, so don't add anything here until we fix - * things to not need it for backing devices anymore - */ - }; - }; - - __le32 last_mount; /* time overflow in y2106 */ - - __le16 first_bucket; - union { - __le16 njournal_buckets; - __le16 keys; - }; - __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ - __le16 obso_bucket_size_hi; /* obsoleted */ -}; - -/* - * This is for in-memory bcache super block. - * NOTE: cache_sb is NOT exactly mapping to cache_sb_disk, the member - * size, ordering and even whole struct size may be different - * from cache_sb_disk. - */ -struct cache_sb { - __u64 offset; /* sector where this sb was written */ - __u64 version; - - __u8 magic[16]; - - __u8 uuid[16]; - union { - __u8 set_uuid[16]; - __u64 set_magic; - }; - __u8 label[SB_LABEL_SIZE]; - - __u64 flags; - __u64 seq; - - __u64 feature_compat; - __u64 feature_incompat; - __u64 feature_ro_compat; - - union { - struct { - /* Cache devices */ - __u64 nbuckets; /* device size */ - - __u16 block_size; /* sectors */ - __u16 nr_in_set; - __u16 nr_this_dev; - __u32 bucket_size; /* sectors */ - }; - struct { - /* Backing devices */ - __u64 data_offset; - - /* - * block_size from the cache device section is still used by - * backing devices, so don't add anything here until we fix - * things to not need it for backing devices anymore - */ - }; - }; - - __u32 last_mount; /* time overflow in y2106 */ - - __u16 first_bucket; - union { - __u16 njournal_buckets; - __u16 keys; - }; - __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ -}; - -static inline _Bool SB_IS_BDEV(const struct cache_sb *sb) -{ - return sb->version == BCACHE_SB_VERSION_BDEV - || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET - || sb->version == BCACHE_SB_VERSION_BDEV_WITH_FEATURES; -} - -BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); -BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); -BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); -#define CACHE_REPLACEMENT_LRU 0U -#define CACHE_REPLACEMENT_FIFO 1U -#define CACHE_REPLACEMENT_RANDOM 2U - -BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); -#define CACHE_MODE_WRITETHROUGH 0U -#define CACHE_MODE_WRITEBACK 1U -#define CACHE_MODE_WRITEAROUND 2U -#define CACHE_MODE_NONE 3U -BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); -#define BDEV_STATE_NONE 0U -#define BDEV_STATE_CLEAN 1U -#define BDEV_STATE_DIRTY 2U -#define BDEV_STATE_STALE 3U - -/* - * Magic numbers - * - * The various other data structures have their own magic numbers, which are - * xored with the first part of the cache set's UUID - */ - -#define JSET_MAGIC 0x245235c1a3625032ULL -#define PSET_MAGIC 0x6750e15f87337f91ULL -#define BSET_MAGIC 0x90135c78b99e07f5ULL - -static inline __u64 jset_magic(struct cache_sb *sb) -{ - return sb->set_magic ^ JSET_MAGIC; -} - -static inline __u64 pset_magic(struct cache_sb *sb) -{ - return sb->set_magic ^ PSET_MAGIC; -} - -static inline __u64 bset_magic(struct cache_sb *sb) -{ - return sb->set_magic ^ BSET_MAGIC; -} - -/* - * Journal - * - * On disk format for a journal entry: - * seq is monotonically increasing; every journal entry has its own unique - * sequence number. - * - * last_seq is the oldest journal entry that still has keys the btree hasn't - * flushed to disk yet. - * - * version is for on disk format changes. - */ - -#define BCACHE_JSET_VERSION_UUIDv1 1 -#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ -#define BCACHE_JSET_VERSION 1 - -struct jset { - __u64 csum; - __u64 magic; - __u64 seq; - __u32 version; - __u32 keys; - - __u64 last_seq; - - BKEY_PADDED(uuid_bucket); - BKEY_PADDED(btree_root); - __u16 btree_level; - __u16 pad[3]; - - __u64 prio_bucket[MAX_CACHES_PER_SET]; - - union { - struct bkey start[0]; - __u64 d[0]; - }; -}; - -/* Bucket prios/gens */ - -struct prio_set { - __u64 csum; - __u64 magic; - __u64 seq; - __u32 version; - __u32 pad; - - __u64 next_bucket; - - struct bucket_disk { - __u16 prio; - __u8 gen; - } __attribute((packed)) data[]; -}; - -/* UUIDS - per backing device/flash only volume metadata */ - -struct uuid_entry { - union { - struct { - __u8 uuid[16]; - __u8 label[32]; - __u32 first_reg; /* time overflow in y2106 */ - __u32 last_reg; - __u32 invalidated; - - __u32 flags; - /* Size of flash only volumes */ - __u64 sectors; - }; - - __u8 pad[128]; - }; -}; - -BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); - -/* Btree nodes */ - -/* Version 1: Seed pointer into btree node checksum - */ -#define BCACHE_BSET_CSUM 1 -#define BCACHE_BSET_VERSION 1 - -/* - * Btree nodes - * - * On disk a btree node is a list/log of these; within each set the keys are - * sorted - */ -struct bset { - __u64 csum; - __u64 magic; - __u64 seq; - __u32 version; - __u32 keys; - - union { - struct bkey start[0]; - __u64 d[0]; - }; -}; - -/* OBSOLETE */ - -/* UUIDS - per backing device/flash only volume metadata */ - -struct uuid_entry_v0 { - __u8 uuid[16]; - __u8 label[32]; - __u32 first_reg; - __u32 last_reg; - __u32 invalidated; - __u32 pad; -}; - -#endif /* _LINUX_BCACHE_H */ -- cgit v1.2.3 From 56fa95014a0447f798444e626091cbeb3176af24 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 25 Oct 2021 15:43:29 +0200 Subject: netfilter: nft_meta: add NFT_META_IFTYPE Generalize NFT_META_IIFTYPE to NFT_META_IFTYPE which allows you to match on the interface type of the skb->dev field. This field is used by the netdev family to add an implicit dependency to skip non-ethernet packets when matching on layer 3 and 4 TCP/IP header fields. For backward compatibility, add the NFT_META_IIFTYPE alias to NFT_META_IFTYPE. Add __NFT_META_IIFTYPE, to be used by userspace in the future to match specifically on the iiftype. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index e94d1fa554cb..08db4ee06ab6 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -896,7 +896,8 @@ enum nft_meta_keys { NFT_META_OIF, NFT_META_IIFNAME, NFT_META_OIFNAME, - NFT_META_IIFTYPE, + NFT_META_IFTYPE, +#define NFT_META_IIFTYPE NFT_META_IFTYPE NFT_META_OIFTYPE, NFT_META_SKUID, NFT_META_SKGID, @@ -923,6 +924,7 @@ enum nft_meta_keys { NFT_META_TIME_HOUR, NFT_META_SDIF, NFT_META_SDIFNAME, + __NFT_META_IIFTYPE, }; /** -- cgit v1.2.3 From e47be840e87ea15677bca2043ee7b696ccacf56a Mon Sep 17 00:00:00 2001 From: Wu Zongyong Date: Fri, 29 Oct 2021 17:14:48 +0800 Subject: vdpa: add new attribute VDPA_ATTR_DEV_MIN_VQ_SIZE This attribute advertises the min value of virtqueue size. The value is 1 by default. Signed-off-by: Wu Zongyong Link: https://lore.kernel.org/r/2bbc417355c4d22298050b1ba887cecfbde3e85d.1635493219.git.wuzongyong@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vdpa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 66a41e4ec163..e3b87879514c 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -32,6 +32,7 @@ enum vdpa_attr { VDPA_ATTR_DEV_VENDOR_ID, /* u32 */ VDPA_ATTR_DEV_MAX_VQS, /* u32 */ VDPA_ATTR_DEV_MAX_VQ_SIZE, /* u16 */ + VDPA_ATTR_DEV_MIN_VQ_SIZE, /* u16 */ /* new attributes must be added above here */ VDPA_ATTR_MAX, -- cgit v1.2.3 From c46b38dc8743535e686b911d253a844f0bd50ead Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 28 Oct 2021 22:15:00 +0200 Subject: netfilter: nft_payload: support for inner header matching / mangling Allow to match and mangle on inner headers / payload data after the transport header. There is a new field in the pktinfo structure that stores the inner header offset which is calculated only when requested. Only TCP and UDP supported at this stage. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 08db4ee06ab6..466fd3f4447c 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -753,11 +753,13 @@ enum nft_dynset_attributes { * @NFT_PAYLOAD_LL_HEADER: link layer header * @NFT_PAYLOAD_NETWORK_HEADER: network header * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header + * @NFT_PAYLOAD_INNER_HEADER: inner header / payload */ enum nft_payload_bases { NFT_PAYLOAD_LL_HEADER, NFT_PAYLOAD_NETWORK_HEADER, NFT_PAYLOAD_TRANSPORT_HEADER, + NFT_PAYLOAD_INNER_HEADER, }; /** -- cgit v1.2.3 From dcce162559ee1ce5f64992c4c65197f9270e3d4f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 21 Oct 2021 15:17:49 +0530 Subject: i2c: virtio: Add support for zero-length requests The virtio specification received a new mandatory feature (VIRTIO_I2C_F_ZERO_LENGTH_REQUEST) for zero length requests. Fail if the feature isn't offered by the device. For each read-request, set the VIRTIO_I2C_FLAGS_M_RD flag, as required by the VIRTIO_I2C_F_ZERO_LENGTH_REQUEST feature. This allows us to support zero length requests, like SMBUS Quick, where the buffer need not be sent anymore. Signed-off-by: Viresh Kumar Link: https://lore.kernel.org/r/7c58868cd26d2fc4bd82d0d8b0dfb55636380110.1634808714.git.viresh.kumar@linaro.org Signed-off-by: Michael S. Tsirkin Acked-by: Jie Deng # once the spec is merged --- include/uapi/linux/virtio_i2c.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_i2c.h b/include/uapi/linux/virtio_i2c.h index 7c6a6fc01ad6..acf3b6069136 100644 --- a/include/uapi/linux/virtio_i2c.h +++ b/include/uapi/linux/virtio_i2c.h @@ -11,9 +11,15 @@ #include #include +/* Virtio I2C Feature bits */ +#define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST 0 + /* The bit 0 of the @virtio_i2c_out_hdr.@flags, used to group the requests */ #define VIRTIO_I2C_FLAGS_FAIL_NEXT _BITUL(0) +/* The bit 1 of the @virtio_i2c_out_hdr.@flags, used to mark a buffer as read */ +#define VIRTIO_I2C_FLAGS_M_RD _BITUL(1) + /** * struct virtio_i2c_out_hdr - the virtio I2C message OUT header * @addr: the controlled device address -- cgit v1.2.3 From ad69dd0bf26b88ec6ab26f8bbe5cd74fbed7672a Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Oct 2021 20:55:13 +0300 Subject: vdpa: Introduce query of device config layout Introduce a command to query a device config layout. An example query of network vdpa device: $ vdpa dev add name bar mgmtdev vdpasim_net $ vdpa dev config show bar: mac 00:35:09:19:48:05 link up link_announce false mtu 1500 $ vdpa dev config show -jp { "config": { "bar": { "mac": "00:35:09:19:48:05", "link ": "up", "link_announce ": false, "mtu": 1500, } } } Signed-off-by: Parav Pandit Signed-off-by: Eli Cohen Acked-by: Jason Wang Link: https://lore.kernel.org/r/20211026175519.87795-3-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vdpa.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index e3b87879514c..a252f06f9dfd 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -17,6 +17,7 @@ enum vdpa_command { VDPA_CMD_DEV_NEW, VDPA_CMD_DEV_DEL, VDPA_CMD_DEV_GET, /* can dump */ + VDPA_CMD_DEV_CONFIG_GET, /* can dump */ }; enum vdpa_attr { @@ -34,6 +35,11 @@ enum vdpa_attr { VDPA_ATTR_DEV_MAX_VQ_SIZE, /* u16 */ VDPA_ATTR_DEV_MIN_VQ_SIZE, /* u16 */ + VDPA_ATTR_DEV_NET_CFG_MACADDR, /* binary */ + VDPA_ATTR_DEV_NET_STATUS, /* u8 */ + VDPA_ATTR_DEV_NET_CFG_MAX_VQP, /* u16 */ + VDPA_ATTR_DEV_NET_CFG_MTU, /* u16 */ + /* new attributes must be added above here */ VDPA_ATTR_MAX, }; -- cgit v1.2.3 From b9022b53adad88fd6cf2b9718c9e498504f3e1dd Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 31 Oct 2021 16:00:02 +0000 Subject: amt: add control plane of amt interface It adds definitions and control plane code for AMT. this is very similar to udp tunneling interfaces such as gtp, vxlan, etc. In the next patch, data plane code will be added. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- include/uapi/linux/amt.h | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 include/uapi/linux/amt.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/amt.h b/include/uapi/linux/amt.h new file mode 100644 index 000000000000..2dccff417195 --- /dev/null +++ b/include/uapi/linux/amt.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (c) 2021 Taehee Yoo + */ +#ifndef _UAPI_AMT_H_ +#define _UAPI_AMT_H_ + +enum ifla_amt_mode { + /* AMT interface works as Gateway mode. + * The Gateway mode encapsulates IGMP/MLD traffic and decapsulates + * multicast traffic. + */ + AMT_MODE_GATEWAY = 0, + /* AMT interface works as Relay mode. + * The Relay mode encapsulates multicast traffic and decapsulates + * IGMP/MLD traffic. + */ + AMT_MODE_RELAY, + __AMT_MODE_MAX, +}; + +#define AMT_MODE_MAX (__AMT_MODE_MAX - 1) + +enum { + IFLA_AMT_UNSPEC, + /* This attribute specify mode etier Gateway or Relay. */ + IFLA_AMT_MODE, + /* This attribute specify Relay port. + * AMT interface is created as Gateway mode, this attribute is used + * to specify relay(remote) port. + * AMT interface is created as Relay mode, this attribute is used + * as local port. + */ + IFLA_AMT_RELAY_PORT, + /* This attribute specify Gateway port. + * AMT interface is created as Gateway mode, this attribute is used + * as local port. + * AMT interface is created as Relay mode, this attribute is not used. + */ + IFLA_AMT_GATEWAY_PORT, + /* This attribute specify physical device */ + IFLA_AMT_LINK, + /* This attribute specify local ip address */ + IFLA_AMT_LOCAL_IP, + /* This attribute specify Relay ip address. + * So, this is not used by Relay. + */ + IFLA_AMT_REMOTE_IP, + /* This attribute specify Discovery ip address. + * When Gateway get started, it send discovery message to find the + * Relay's ip address. + * So, this is not used by Relay. + */ + IFLA_AMT_DISCOVERY_IP, + /* This attribute specify number of maximum tunnel. */ + IFLA_AMT_MAX_TUNNELS, + __IFLA_AMT_MAX, +}; + +#define IFLA_AMT_MAX (__IFLA_AMT_MAX - 1) + +#endif /* _UAPI_AMT_H_ */ -- cgit v1.2.3 From 8845b4681bf44b9d2d2badf2c67cf476e42a86bd Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Fri, 29 Oct 2021 15:49:08 -0700 Subject: bpf: Add alignment padding for "map_extra" + consolidate holes This patch makes 2 changes regarding alignment padding for the "map_extra" field. 1) In the kernel header, "map_extra" and "btf_value_type_id" are rearranged to consolidate the hole. Before: struct bpf_map { ... u32 max_entries; /* 36 4 */ u32 map_flags; /* 40 4 */ /* XXX 4 bytes hole, try to pack */ u64 map_extra; /* 48 8 */ int spin_lock_off; /* 56 4 */ int timer_off; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ u32 id; /* 64 4 */ int numa_node; /* 68 4 */ ... bool frozen; /* 117 1 */ /* XXX 10 bytes hole, try to pack */ /* --- cacheline 2 boundary (128 bytes) --- */ ... struct work_struct work; /* 144 72 */ /* --- cacheline 3 boundary (192 bytes) was 24 bytes ago --- */ struct mutex freeze_mutex; /* 216 144 */ /* --- cacheline 5 boundary (320 bytes) was 40 bytes ago --- */ u64 writecnt; /* 360 8 */ /* size: 384, cachelines: 6, members: 26 */ /* sum members: 354, holes: 2, sum holes: 14 */ /* padding: 16 */ /* forced alignments: 2, forced holes: 1, sum forced holes: 10 */ } __attribute__((__aligned__(64))); After: struct bpf_map { ... u32 max_entries; /* 36 4 */ u64 map_extra; /* 40 8 */ u32 map_flags; /* 48 4 */ int spin_lock_off; /* 52 4 */ int timer_off; /* 56 4 */ u32 id; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ int numa_node; /* 64 4 */ ... bool frozen /* 113 1 */ /* XXX 14 bytes hole, try to pack */ /* --- cacheline 2 boundary (128 bytes) --- */ ... struct work_struct work; /* 144 72 */ /* --- cacheline 3 boundary (192 bytes) was 24 bytes ago --- */ struct mutex freeze_mutex; /* 216 144 */ /* --- cacheline 5 boundary (320 bytes) was 40 bytes ago --- */ u64 writecnt; /* 360 8 */ /* size: 384, cachelines: 6, members: 26 */ /* sum members: 354, holes: 1, sum holes: 14 */ /* padding: 16 */ /* forced alignments: 2, forced holes: 1, sum forced holes: 14 */ } __attribute__((__aligned__(64))); 2) Add alignment padding to the bpf_map_info struct More details can be found in commit 36f9814a494a ("bpf: fix uapi hole for 32 bit compat applications") Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211029224909.1721024-3-joannekoong@fb.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bd0c9f0487f6..ba5af15e25f5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5662,6 +5662,7 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u32 :32; /* alignment pad */ __u64 map_extra; } __attribute__((aligned(8))); -- cgit v1.2.3 From fcdb44d08a95003c3d040aecdee286156ec6f34e Mon Sep 17 00:00:00 2001 From: James Prestwood Date: Mon, 1 Nov 2021 10:36:28 -0700 Subject: net: arp: introduce arp_evict_nocarrier sysctl parameter This change introduces a new sysctl parameter, arp_evict_nocarrier. When set (default) the ARP cache will be cleared on a NOCARRIER event. This new option has been defaulted to '1' which maintains existing behavior. Clearing the ARP cache on NOCARRIER is relatively new, introduced by: commit 859bd2ef1fc1110a8031b967ee656c53a6260a76 Author: David Ahern Date: Thu Oct 11 20:33:49 2018 -0700 net: Evict neighbor entries on carrier down The reason for this changes is to prevent the ARP cache from being cleared when a wireless device roams. Specifically for wireless roams the ARP cache should not be cleared because the underlying network has not changed. Clearing the ARP cache in this case can introduce significant delays sending out packets after a roam. A user reported such a situation here: https://lore.kernel.org/linux-wireless/CACsRnHWa47zpx3D1oDq9JYnZWniS8yBwW1h0WAVZ6vrbwL_S0w@mail.gmail.com/ After some investigation it was found that the kernel was holding onto packets until ARP finished which resulted in this 1 second delay. It was also found that the first ARP who-has was never responded to, which is actually what caues the delay. This change is more or less working around this behavior, but again, there is no reason to clear the cache on a roam anyways. As for the unanswered who-has, we know the packet made it OTA since it was seen while monitoring. Why it never received a response is unknown. In any case, since this is a problem on the AP side of things all that can be done is to work around it until it is solved. Some background on testing/reproducing the packet delay: Hardware: - 2 access points configured for Fast BSS Transition (Though I don't see why regular reassociation wouldn't have the same behavior) - Wireless station running IWD as supplicant - A device on network able to respond to pings (I used one of the APs) Procedure: - Connect to first AP - Ping once to establish an ARP entry - Start a tcpdump - Roam to second AP - Wait for operstate UP event, and note the timestamp - Start pinging Results: Below is the tcpdump after UP. It was recorded the interface went UP at 10:42:01.432875. 10:42:01.461871 ARP, Request who-has 192.168.254.1 tell 192.168.254.71, length 28 10:42:02.497976 ARP, Request who-has 192.168.254.1 tell 192.168.254.71, length 28 10:42:02.507162 ARP, Reply 192.168.254.1 is-at ac:86:74:55:b0:20, length 46 10:42:02.507185 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 1, length 64 10:42:02.507205 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 2, length 64 10:42:02.507212 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 3, length 64 10:42:02.507219 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 4, length 64 10:42:02.507225 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 5, length 64 10:42:02.507232 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 6, length 64 10:42:02.515373 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 1, length 64 10:42:02.521399 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 2, length 64 10:42:02.521612 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 3, length 64 10:42:02.521941 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 4, length 64 10:42:02.522419 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 5, length 64 10:42:02.523085 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 6, length 64 You can see the first ARP who-has went out very quickly after UP, but was never responded to. Nearly a second later the kernel retries and gets a response. Only then do the ping packets go out. If an ARP entry is manually added prior to UP (after the cache is cleared) it is seen that the first ping is never responded to, so its not only an issue with ARP but with data packets in general. As mentioned prior, the wireless interface was also monitored to verify the ping/ARP packet made it OTA which was observed to be true. Signed-off-by: James Prestwood Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/uapi/linux/ip.h | 1 + include/uapi/linux/sysctl.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index e42d13b55cf3..e00bbb9c47bb 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -169,6 +169,7 @@ enum IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, IPV4_DEVCONF_DROP_GRATUITOUS_ARP, IPV4_DEVCONF_BC_FORWARDING, + IPV4_DEVCONF_ARP_EVICT_NOCARRIER, __IPV4_DEVCONF_MAX }; diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 1e05d3caa712..6a3b194c50fe 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -482,6 +482,7 @@ enum NET_IPV4_CONF_PROMOTE_SECONDARIES=20, NET_IPV4_CONF_ARP_ACCEPT=21, NET_IPV4_CONF_ARP_NOTIFY=22, + NET_IPV4_CONF_ARP_EVICT_NOCARRIER=23, }; /* /proc/sys/net/ipv4/netfilter */ -- cgit v1.2.3 From 18ac597af25e9760b76471524096f5b29eb820e6 Mon Sep 17 00:00:00 2001 From: James Prestwood Date: Mon, 1 Nov 2021 10:36:29 -0700 Subject: net: ndisc: introduce ndisc_evict_nocarrier sysctl parameter In most situations the neighbor discovery cache should be cleared on a NOCARRIER event which is currently done unconditionally. But for wireless roams the neighbor discovery cache can and should remain intact since the underlying network has not changed. This patch introduces a sysctl option ndisc_evict_nocarrier which can be disabled by a wireless supplicant during a roam. This allows packets to be sent after a roam immediately without having to wait for neighbor discovery. A user reported roughly a 1 second delay after a roam before packets could be sent out (note, on IPv4). This delay was due to the ARP cache being cleared. During testing of this same scenario using IPv6 no delay was noticed, but regardless there is no reason to clear the ndisc cache for wireless roams. Signed-off-by: James Prestwood Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/uapi/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index b243a53fa985..d4178dace0bf 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -193,6 +193,7 @@ enum { DEVCONF_IOAM6_ENABLED, DEVCONF_IOAM6_ID, DEVCONF_IOAM6_ID_WIDE, + DEVCONF_NDISC_EVICT_NOCARRIER, DEVCONF_MAX }; -- cgit v1.2.3 From 1aabe578dd86e9f2867c4db4fba9a15f4ba1825d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 2 Nov 2021 15:02:36 -0700 Subject: ethtool: fix ethtool msg len calculation for pause stats ETHTOOL_A_PAUSE_STAT_MAX is the MAX attribute id, so we need to subtract non-stats and add one to get a count (IOW -2+1 == -1). Otherwise we'll see: ethnl cmd 21: calculated reply length 40, but consumed 52 Fixes: 9a27a33027f2 ("ethtool: add standard pause stats") Signed-off-by: Jakub Kicinski Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index ca5fbb59fa42..999777d32dcf 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -411,7 +411,9 @@ enum { ETHTOOL_A_PAUSE_STAT_TX_FRAMES, ETHTOOL_A_PAUSE_STAT_RX_FRAMES, - /* add new constants above here */ + /* add new constants above here + * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! + */ __ETHTOOL_A_PAUSE_STAT_CNT, ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) }; -- cgit v1.2.3 From eff5cdd745a68863a73095b0b4d62d15e0d9d902 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 21 Oct 2021 16:34:19 +0530 Subject: gpio: virtio: Add IRQ support This patch adds IRQ support for the virtio GPIO driver. Note that this uses the irq_bus_lock/unlock() callbacks, since those operations over virtio may sleep. Reviewed-by: Linus Walleij Signed-off-by: Viresh Kumar Acked-by: Michael S. Tsirkin Signed-off-by: Bartosz Golaszewski --- include/uapi/linux/virtio_gpio.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_gpio.h b/include/uapi/linux/virtio_gpio.h index 0445f905d8cc..d04af9c5f0de 100644 --- a/include/uapi/linux/virtio_gpio.h +++ b/include/uapi/linux/virtio_gpio.h @@ -5,12 +5,16 @@ #include +/* Virtio GPIO Feature bits */ +#define VIRTIO_GPIO_F_IRQ 0 + /* Virtio GPIO request types */ #define VIRTIO_GPIO_MSG_GET_NAMES 0x0001 #define VIRTIO_GPIO_MSG_GET_DIRECTION 0x0002 #define VIRTIO_GPIO_MSG_SET_DIRECTION 0x0003 #define VIRTIO_GPIO_MSG_GET_VALUE 0x0004 #define VIRTIO_GPIO_MSG_SET_VALUE 0x0005 +#define VIRTIO_GPIO_MSG_IRQ_TYPE 0x0006 /* Possible values of the status field */ #define VIRTIO_GPIO_STATUS_OK 0x0 @@ -21,6 +25,14 @@ #define VIRTIO_GPIO_DIRECTION_OUT 0x01 #define VIRTIO_GPIO_DIRECTION_IN 0x02 +/* Virtio GPIO IRQ types */ +#define VIRTIO_GPIO_IRQ_TYPE_NONE 0x00 +#define VIRTIO_GPIO_IRQ_TYPE_EDGE_RISING 0x01 +#define VIRTIO_GPIO_IRQ_TYPE_EDGE_FALLING 0x02 +#define VIRTIO_GPIO_IRQ_TYPE_EDGE_BOTH 0x03 +#define VIRTIO_GPIO_IRQ_TYPE_LEVEL_HIGH 0x04 +#define VIRTIO_GPIO_IRQ_TYPE_LEVEL_LOW 0x08 + struct virtio_gpio_config { __le16 ngpio; __u8 padding[2]; @@ -44,4 +56,17 @@ struct virtio_gpio_response_get_names { __u8 value[]; }; +/* Virtio GPIO IRQ Request / Response */ +struct virtio_gpio_irq_request { + __le16 gpio; +}; + +struct virtio_gpio_irq_response { + __u8 status; +}; + +/* Possible values of the interrupt status field */ +#define VIRTIO_GPIO_IRQ_STATUS_INVALID 0x0 +#define VIRTIO_GPIO_IRQ_STATUS_VALID 0x1 + #endif /* _LINUX_VIRTIO_GPIO_H */ -- cgit v1.2.3 From 7d0003da6297eb128f3490e396e6fc6df71557cd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 1 Nov 2021 05:11:25 -0400 Subject: virtio_gpio: drop packed attribute Declaring the struct packed here is mostly harmless, but gives a bad example for people to copy. As the struct is packed and aligned manually, let's just drop the attribute. Signed-off-by: Michael S. Tsirkin Acked-by: Viresh Kumar Signed-off-by: Bartosz Golaszewski --- include/uapi/linux/virtio_gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_gpio.h b/include/uapi/linux/virtio_gpio.h index d04af9c5f0de..d4b29d9a39dd 100644 --- a/include/uapi/linux/virtio_gpio.h +++ b/include/uapi/linux/virtio_gpio.h @@ -37,7 +37,7 @@ struct virtio_gpio_config { __le16 ngpio; __u8 padding[2]; __le32 gpio_names_size; -} __packed; +}; /* Virtio GPIO Request / Response */ struct virtio_gpio_request { -- cgit v1.2.3 From aedad3e1c6ddec234b63cfb57ac231da0f680e50 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 5 Nov 2021 16:08:29 -0700 Subject: arm64: mte: change PR_MTE_TCF_NONE back into an unsigned long This constant was previously an unsigned long, but was changed into an int in commit 433c38f40f6a ("arm64: mte: change ASYNC and SYNC TCF settings into bitfields"). This ended up causing spurious unsigned-signed comparison warnings in expressions such as: (x & PR_MTE_TCF_MASK) != PR_MTE_TCF_NONE Therefore, change it back into an unsigned long to silence these warnings. Link: https://linux-review.googlesource.com/id/I07a72310db30227a5b7d789d0b817d78b657c639 Signed-off-by: Peter Collingbourne Link: https://lore.kernel.org/r/20211105230829.2254790-1-pcc@google.com Signed-off-by: Will Deacon --- include/uapi/linux/prctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 43bd7f713c39..de45fcd2dcbe 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -235,7 +235,7 @@ struct prctl_mm_map { #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) /* MTE tag check fault modes */ -# define PR_MTE_TCF_NONE 0 +# define PR_MTE_TCF_NONE 0UL # define PR_MTE_TCF_SYNC (1UL << 1) # define PR_MTE_TCF_ASYNC (1UL << 2) # define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) -- cgit v1.2.3 From 61082ad6a6e1f999eef7e7e90046486c87933b1e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 15 Mar 2021 16:57:13 +0100 Subject: virtio-mem: support VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE The initial virtio-mem spec states that while unplugged memory should not be read, the device still has to allow for reading unplugged memory inside the usable region. The primary motivation for this default handling was to simplify bringup of virtio-mem, because there were corner cases where Linux might have accidentially read unplugged memory inside added Linux memory blocks. In the meantime, we: 1. Removed /dev/kmem in commit bbcd53c96071 ("drivers/char: remove /dev/kmem for good") 2. Disallowed access to virtio-mem device memory via /dev/mem in commit 2128f4e21aa2 ("virtio-mem: disallow mapping virtio-mem memory via /dev/mem") 3. Sanitized access to virtio-mem device memory via /proc/kcore in commit 0daa322b8ff9 ("fs/proc/kcore: don't read offline sections, logically offline pages and hwpoisoned pages") 4. Sanitized access to virtio-mem device memory via /proc/vmcore in commit ce2814622e84 ("virtio-mem: kdump mode to sanitize /proc/vmcore access") "Accidential" access to unplugged memory is no longer possible; we can support the new VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE feature that will be required by some hypervisors implementing virtio-mem in the near future. Acked-by: Michael S. Tsirkin Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Marek Kedzierski Cc: Hui Zhu Cc: Sebastien Boeuf Cc: Pankaj Gupta Cc: Wei Yang Signed-off-by: David Hildenbrand --- include/uapi/linux/virtio_mem.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h index 70e01c687d5e..e9122f1d0e0c 100644 --- a/include/uapi/linux/virtio_mem.h +++ b/include/uapi/linux/virtio_mem.h @@ -68,9 +68,10 @@ * explicitly triggered (VIRTIO_MEM_REQ_UNPLUG). * * There are no guarantees what will happen if unplugged memory is - * read/written. Such memory should, in general, not be touched. E.g., - * even writing might succeed, but the values will simply be discarded at - * random points in time. + * read/written. In general, unplugged memory should not be touched, because + * the resulting action is undefined. There is one exception: without + * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, unplugged memory inside the usable + * region can be read, to simplify creation of memory dumps. * * It can happen that the device cannot process a request, because it is * busy. The device driver has to retry later. @@ -87,6 +88,8 @@ /* node_id is an ACPI PXM and is valid */ #define VIRTIO_MEM_F_ACPI_PXM 0 +/* unplugged memory must not be accessed */ +#define VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE 1 /* --- virtio-mem: guest -> host requests --- */ -- cgit v1.2.3 From b56639318bb2be66aceba92836279714488709b4 Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Thu, 21 Oct 2021 10:43:00 -0700 Subject: KVM: SEV: Add support for SEV intra host migration For SEV to work with intra host migration, contents of the SEV info struct such as the ASID (used to index the encryption key in the AMD SP) and the list of memory regions need to be transferred to the target VM. This change adds a commands for a target VMM to get a source SEV VM's sev info. Signed-off-by: Peter Gonda Suggested-by: Sean Christopherson Reviewed-by: Marc Orr Cc: Marc Orr Cc: Paolo Bonzini Cc: Sean Christopherson Cc: David Rientjes Cc: Dr. David Alan Gilbert Cc: Brijesh Singh Cc: Tom Lendacky Cc: Vitaly Kuznetsov Cc: Wanpeng Li Cc: Jim Mattson Cc: Joerg Roedel Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Message-Id: <20211021174303.385706-3-pgonda@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 78f0719cc2a3..1daa45268de2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1130,6 +1130,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_BINARY_STATS_FD 203 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #define KVM_CAP_ARM_MTE 205 +#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From b5f57384805a34f497edb8b04d694a8a1b3d81d4 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 10 Sep 2021 15:18:20 -0400 Subject: drm/amdkfd: Add sysfs bitfields and enums to uAPI These bits are de-facto part of the uAPI, so declare them in a uAPI header. The corresponding bit-fields and enums in user mode are defined in https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/master/include/hsakmttypes.h HSA_CAP_... -> HSA_CAPABILITY HSA_MEM_HEAP_TYPE_... -> HSA_HEAPTYPE HSA_MEM_FLAGS_... -> HSA_MEMORYPROPERTY HSA_CACHE_TYPE_... -> HsaCacheType HSA_IOLINK_TYPE_... -> HSA_IOLINKTYPE HSA_IOLINK_FLAGS_... -> HSA_LINKPROPERTY Signed-off-by: Felix Kuehling Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_sysfs.h | 108 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 include/uapi/linux/kfd_sysfs.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h new file mode 100644 index 000000000000..e1fb78b4bf09 --- /dev/null +++ b/include/uapi/linux/kfd_sysfs.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT WITH Linux-syscall-note */ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef KFD_SYSFS_H_INCLUDED +#define KFD_SYSFS_H_INCLUDED + +/* Capability bits in node properties */ +#define HSA_CAP_HOT_PLUGGABLE 0x00000001 +#define HSA_CAP_ATS_PRESENT 0x00000002 +#define HSA_CAP_SHARED_WITH_GRAPHICS 0x00000004 +#define HSA_CAP_QUEUE_SIZE_POW2 0x00000008 +#define HSA_CAP_QUEUE_SIZE_32BIT 0x00000010 +#define HSA_CAP_QUEUE_IDLE_EVENT 0x00000020 +#define HSA_CAP_VA_LIMIT 0x00000040 +#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080 +#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 +#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 +#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000 +#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12 + +#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 +#define HSA_CAP_DOORBELL_TYPE_1_0 0x1 +#define HSA_CAP_DOORBELL_TYPE_2_0 0x2 +#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 + +/* Old buggy user mode depends on this being 0 */ +#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED 0x00080000 + +#define HSA_CAP_MEM_EDCSUPPORTED 0x00100000 +#define HSA_CAP_RASEVENTNOTIFY 0x00200000 +#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000 +#define HSA_CAP_ASIC_REVISION_SHIFT 22 +#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000 +#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000 +#define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000 +#define HSA_CAP_RESERVED 0xe00f8000 + +/* Heap types in memory properties */ +#define HSA_MEM_HEAP_TYPE_SYSTEM 0 +#define HSA_MEM_HEAP_TYPE_FB_PUBLIC 1 +#define HSA_MEM_HEAP_TYPE_FB_PRIVATE 2 +#define HSA_MEM_HEAP_TYPE_GPU_GDS 3 +#define HSA_MEM_HEAP_TYPE_GPU_LDS 4 +#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5 + +/* Flag bits in memory properties */ +#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001 +#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002 +#define HSA_MEM_FLAGS_RESERVED 0xfffffffc + +/* Cache types in cache properties */ +#define HSA_CACHE_TYPE_DATA 0x00000001 +#define HSA_CACHE_TYPE_INSTRUCTION 0x00000002 +#define HSA_CACHE_TYPE_CPU 0x00000004 +#define HSA_CACHE_TYPE_HSACU 0x00000008 +#define HSA_CACHE_TYPE_RESERVED 0xfffffff0 + +/* Link types in IO link properties (matches CRAT link types) */ +#define HSA_IOLINK_TYPE_UNDEFINED 0 +#define HSA_IOLINK_TYPE_HYPERTRANSPORT 1 +#define HSA_IOLINK_TYPE_PCIEXPRESS 2 +#define HSA_IOLINK_TYPE_AMBA 3 +#define HSA_IOLINK_TYPE_MIPI 4 +#define HSA_IOLINK_TYPE_QPI_1_1 5 +#define HSA_IOLINK_TYPE_RESERVED1 6 +#define HSA_IOLINK_TYPE_RESERVED2 7 +#define HSA_IOLINK_TYPE_RAPID_IO 8 +#define HSA_IOLINK_TYPE_INFINIBAND 9 +#define HSA_IOLINK_TYPE_RESERVED3 10 +#define HSA_IOLINK_TYPE_XGMI 11 +#define HSA_IOLINK_TYPE_XGOP 12 +#define HSA_IOLINK_TYPE_GZ 13 +#define HSA_IOLINK_TYPE_ETHERNET_RDMA 14 +#define HSA_IOLINK_TYPE_RDMA_OTHER 15 +#define HSA_IOLINK_TYPE_OTHER 16 + +/* Flag bits in IO link properties (matches CRAT flags, excluding the + * bi-directional flag, which is not offially part of the CRAT spec, and + * only used internally in KFD) + */ +#define HSA_IOLINK_FLAGS_ENABLED (1 << 0) +#define HSA_IOLINK_FLAGS_NON_COHERENT (1 << 1) +#define HSA_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2) +#define HSA_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3) +#define HSA_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4) +#define HSA_IOLINK_FLAGS_RESERVED 0xffffffe0 + +#endif -- cgit v1.2.3 From 72f6a45202f20f0e1a46b0acb7803369cc53d0b8 Mon Sep 17 00:00:00 2001 From: Xiayu Zhang Date: Wed, 1 Dec 2021 10:57:13 +0800 Subject: Fix Comment of ETH_P_802_3_MIN The description of ETH_P_802_3_MIN is misleading. The value of EthernetType in Ethernet II frame is more than 0x0600, the value of Length in 802.3 frame is less than 0x0600. Signed-off-by: Xiayu Zhang Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 5da4ee234e0b..c0c2f3ed5729 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -117,7 +117,7 @@ #define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */ #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ -#define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value +#define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is more than this value * then the frame is Ethernet II. Else it is 802.3 */ /* -- cgit v1.2.3 From 9dcc38e2813e0cd3b195940c98b181ce6ede8f20 Mon Sep 17 00:00:00 2001 From: Drew DeVault Date: Fri, 10 Dec 2021 14:46:09 -0800 Subject: Increase default MLOCK_LIMIT to 8 MiB This limit has not been updated since 2008, when it was increased to 64 KiB at the request of GnuPG. Until recently, the main use-cases for this feature were (1) preventing sensitive memory from being swapped, as in GnuPG's use-case; and (2) real-time use-cases. In the first case, little memory is called for, and in the second case, the user is generally in a position to increase it if they need more. The introduction of IOURING_REGISTER_BUFFERS adds a third use-case: preparing fixed buffers for high-performance I/O. This use-case will take as much of this memory as it can get, but is still limited to 64 KiB by default, which is very little. This increases the limit to 8 MB, which was chosen fairly arbitrarily as a more generous, but still conservative, default value. It is also possible to raise this limit in userspace. This is easily done, for example, in the use-case of a network daemon: systemd, for instance, provides for this via LimitMEMLOCK in the service file; OpenRC via the rc_ulimit variables. However, there is no established userspace facility for configuring this outside of daemons: end-user applications do not presently have access to a convenient means of raising their limits. The buck, as it were, stops with the kernel. It's much easier to address it here than it is to bring it to hundreds of distributions, and it can only realistically be relied upon to be high-enough by end-user software if it is more-or-less ubiquitous. Most distros don't change this particular rlimit from the kernel-supplied default value, so a change here will easily provide that ubiquity. Link: https://lkml.kernel.org/r/20211028080813.15966-1-sir@cmpwn.com Signed-off-by: Drew DeVault Acked-by: Jens Axboe Acked-by: Cyril Hrubis Acked-by: Johannes Weiner Cc: Pavel Begunkov Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: Andrew Dona-Couch Cc: Ammar Faizi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/resource.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/resource.h b/include/uapi/linux/resource.h index 74ef57b38f9f..ac5d6a3031db 100644 --- a/include/uapi/linux/resource.h +++ b/include/uapi/linux/resource.h @@ -66,10 +66,17 @@ struct rlimit64 { #define _STK_LIM (8*1024*1024) /* - * GPG2 wants 64kB of mlocked memory, to make sure pass phrases - * and other sensitive information are never written to disk. + * Limit the amount of locked memory by some sane default: + * root can always increase this limit if needed. + * + * The main use-cases are (1) preventing sensitive memory + * from being swapped; (2) real-time operations; (3) via + * IOURING_REGISTER_BUFFERS. + * + * The first two don't need much. The latter will take as + * much as it can get. 8MB is a reasonably sane default. */ -#define MLOCK_LIMIT ((PAGE_SIZE > 64*1024) ? PAGE_SIZE : 64*1024) +#define MLOCK_LIMIT (8*1024*1024) /* * Due to binary compatibility, the actual resource numbers -- cgit v1.2.3