From 571d91dcadfa3cef499010b4eddb9b58b0da4d24 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 25 Oct 2023 13:16:19 -0700 Subject: perf: Add branch stack counters Currently, the additional information of a branch entry is stored in a u64 space. With more and more information added, the space is running out. For example, the information of occurrences of events will be added for each branch. Two places were suggested to append the counters. https://lore.kernel.org/lkml/20230802215814.GH231007@hirez.programming.kicks-ass.net/ One place is right after the flags of each branch entry. It changes the existing struct perf_branch_entry. The later ARCH specific implementation has to be really careful to consistently pick the right struct. The other place is right after the entire struct perf_branch_stack. The disadvantage is that the pointer of the extra space has to be recorded. The common interface perf_sample_save_brstack() has to be updated. The latter is much straightforward, and should be easily understood and maintained. It is implemented in the patch. Add a new branch sample type, PERF_SAMPLE_BRANCH_COUNTERS, to indicate the event which is recorded in the branch info. The "u64 counters" may store the occurrences of several events. The information regarding the number of events/counters and the width of each counter should be exposed via sysfs as a reference for the perf tool. Define the branch_counter_nr and branch_counter_width ABI here. The support will be implemented later in the Intel-specific patch. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231025201626.3000228-1-kan.liang@linux.intel.com --- include/uapi/linux/perf_event.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 39c6a250dd1b..4461f380425b 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ + PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -235,6 +237,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -982,6 +986,12 @@ enum perf_event_type { * { u64 nr; * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX * { u64 from, to, flags } lbr[nr]; + * # + * # The format of the counters is decided by the + * # "branch_counter_nr" and "branch_counter_width", + * # which are defined in the ABI. + * # + * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi -- cgit v1.2.3 From 33744916196b4ed7a50f6f47af7c3ad46b730ce6 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 25 Oct 2023 13:16:23 -0700 Subject: perf/x86/intel: Support branch counters logging The branch counters logging (A.K.A LBR event logging) introduces a per-counter indication of precise event occurrences in LBRs. It can provide a means to attribute exposed retirement latency to combinations of events across a block of instructions. It also provides a means of attributing Timed LBR latencies to events. The feature is first introduced on SRF/GRR. It is an enhancement of the ARCH LBR. It adds new fields in the LBR_INFO MSRs to log the occurrences of events on the GP counters. The information is displayed by the order of counters. The design proposed in this patch requires that the events which are logged must be in a group with the event that has LBR. If there are more than one LBR group, the counters logging information only from the current group (overflowed) are stored for the perf tool, otherwise the perf tool cannot know which and when other groups are scheduled especially when multiplexing is triggered. The user can ensure it uses the maximum number of counters that support LBR info (4 by now) by making the group large enough. The HW only logs events by the order of counters. The order may be different from the order of enabling which the perf tool can understand. When parsing the information of each branch entry, convert the counter order to the enabled order, and store the enabled order in the extension space. Unconditionally reset LBRs for an LBR event group when it's deleted. The logged counter information is only valid for the current LBR group. If another LBR group is scheduled later, the information from the stale LBRs would be otherwise wrongly interpreted. Add a sanity check in intel_pmu_hw_config(). Disable the feature if other counter filters (inv, cmask, edge, in_tx) are set or LBR call stack mode is enabled. (For the LBR call stack mode, we cannot simply flush the LBR, since it will break the call stack. Also, there is no obvious usage with the call stack mode for now.) Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't require any branch stack setup. Expose the maximum number of supported counters and the width of the counters into the sysfs. The perf tool can use the information to parse the logged counters in each branch. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231025201626.3000228-5-kan.liang@linux.intel.com --- include/uapi/linux/perf_event.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4461f380425b..3a64499b0f5d 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1437,6 +1437,9 @@ struct perf_branch_entry { reserved:31; }; +/* Size of used info bits in struct perf_branch_entry */ +#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 + union perf_sample_weight { __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) -- cgit v1.2.3 From 155addf0814a92d08fce26a11b27e3315cdba977 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 3 Nov 2023 19:49:00 -0700 Subject: bpf: Use named fields for certain bpf uapi structs Martin and Vadim reported a verifier failure with bpf_dynptr usage. The issue is mentioned but Vadim workarounded the issue with source change ([1]). The below describes what is the issue and why there is a verification failure. int BPF_PROG(skb_crypto_setup) { struct bpf_dynptr algo, key; ... bpf_dynptr_from_mem(..., ..., 0, &algo); ... } The bpf program is using vmlinux.h, so we have the following definition in vmlinux.h: struct bpf_dynptr { long: 64; long: 64; }; Note that in uapi header bpf.h, we have struct bpf_dynptr { long: 64; long: 64; } __attribute__((aligned(8))); So we lost alignment information for struct bpf_dynptr by using vmlinux.h. Let us take a look at a simple program below: $ cat align.c typedef unsigned long long __u64; struct bpf_dynptr_no_align { __u64 :64; __u64 :64; }; struct bpf_dynptr_yes_align { __u64 :64; __u64 :64; } __attribute__((aligned(8))); void bar(void *, void *); int foo() { struct bpf_dynptr_no_align a; struct bpf_dynptr_yes_align b; bar(&a, &b); return 0; } $ clang --target=bpf -O2 -S -emit-llvm align.c Look at the generated IR file align.ll: ... %a = alloca %struct.bpf_dynptr_no_align, align 1 %b = alloca %struct.bpf_dynptr_yes_align, align 8 ... The compiler dictates the alignment for struct bpf_dynptr_no_align is 1 and the alignment for struct bpf_dynptr_yes_align is 8. So theoretically compiler could allocate variable %a with alignment 1 although in reallity the compiler may choose a different alignment by considering other local variables. In [1], the verification failure happens because variable 'algo' is allocated on the stack with alignment 4 (fp-28). But the verifer wants its alignment to be 8. To fix the issue, the RFC patch ([1]) tried to add '__attribute__((aligned(8)))' to struct bpf_dynptr plus other similar structs. Andrii suggested that we could directly modify uapi struct with named fields like struct 'bpf_iter_num': struct bpf_iter_num { /* opaque iterator state; having __u64 here allows to preserve correct * alignment requirements in vmlinux.h, generated from BTF */ __u64 __opaque[1]; } __attribute__((aligned(8))); Indeed, adding named fields for those affected structs in this patch can preserve alignment when bpf program references them in vmlinux.h. With this patch, the verification failure in [1] can also be resolved. [1] https://lore.kernel.org/bpf/1b100f73-7625-4c1f-3ae5-50ecf84d3ff0@linux.dev/ [2] https://lore.kernel.org/bpf/20231103055218.2395034-1-yonghong.song@linux.dev/ Cc: Vadim Fedorenko Cc: Martin KaFai Lau Suggested-by: Andrii Nakryiko Signed-off-by: Yonghong Song Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231104024900.1539182-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0f6cdf52b1da..095ca7238ac2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7151,40 +7151,31 @@ struct bpf_spin_lock { }; struct bpf_timer { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_dynptr { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_head { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_node { - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[3]; } __attribute__((aligned(8))); struct bpf_rb_root { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_rb_node { - __u64 :64; - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[4]; } __attribute__((aligned(8))); struct bpf_refcount { - __u32 :32; + __u32 __opaque[1]; } __attribute__((aligned(4))); struct bpf_sysctl { -- cgit v1.2.3 From b8e3a87a627b575896e448021e5c2f8a3bc19931 Mon Sep 17 00:00:00 2001 From: Jordan Rome Date: Wed, 8 Nov 2023 03:23:34 -0800 Subject: bpf: Add crosstask check to __bpf_get_stack Currently get_perf_callchain only supports user stack walking for the current task. Passing the correct *crosstask* param will return 0 frames if the task passed to __bpf_get_stack isn't the current one instead of a single incorrect frame/address. This change passes the correct *crosstask* param but also does a preemptive check in __bpf_get_stack if the task is current and returns -EOPNOTSUPP if it is not. This issue was found using bpf_get_task_stack inside a BPF iterator ("iter/task"), which iterates over all tasks. bpf_get_task_stack works fine for fetching kernel stacks but because get_perf_callchain relies on the caller to know if the requested *task* is the current one (via *crosstask*) it was failing in a confusing way. It might be possible to get user stacks for all tasks utilizing something like access_process_vm but that requires the bpf program calling bpf_get_task_stack to be sleepable and would therefore be a breaking change. Fixes: fa28dcb82a38 ("bpf: Introduce helper bpf_get_task_stack()") Signed-off-by: Jordan Rome Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231108112334.3433136-1-jordalgo@meta.com --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 095ca7238ac2..7cf8bcf9f6a2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4517,6 +4517,8 @@ union bpf_attr { * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. + * Note: the user stack will only be populated if the *task* is + * the current task; all other tasks will return -EOPNOTSUPP. * To achieve this, the helper needs *task*, which is a valid * pointer to **struct task_struct**. To store the stacktrace, the * bpf program provides *buf* with a nonnegative *size*. @@ -4528,6 +4530,7 @@ union bpf_attr { * * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. + * The *task* must be the current task. * **BPF_F_USER_BUILD_ID** * Collect buildid+offset instead of ips for user stack, * only valid if **BPF_F_USER_STACK** is also specified. -- cgit v1.2.3 From f3b8788cde61b02f1e6c202f8fac4360e6adbafc Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 12 Sep 2023 13:56:46 -0700 Subject: LSM: Identify modules by more than name Create a struct lsm_id to contain identifying information about Linux Security Modules (LSMs). At inception this contains the name of the module and an identifier associated with the security module. Change the security_add_hooks() interface to use this structure. Change the individual modules to maintain their own struct lsm_id and pass it to security_add_hooks(). The values are for LSM identifiers are defined in a new UAPI header file linux/lsm.h. Each existing LSM has been updated to include it's LSMID in the lsm_id. The LSM ID values are sequential, with the oldest module LSM_ID_CAPABILITY being the lowest value and the existing modules numbered in the order they were included in the main line kernel. This is an arbitrary convention for assigning the values, but none better presents itself. The value 0 is defined as being invalid. The values 1-99 are reserved for any special case uses which may arise in the future. This may include attributes of the LSM infrastructure itself, possibly related to namespacing or network attribute management. A special range is identified for such attributes to help reduce confusion for developers unfamiliar with LSMs. LSM attribute values are defined for the attributes presented by modules that are available today. As with the LSM IDs, The value 0 is defined as being invalid. The values 1-99 are reserved for any special case uses which may arise in the future. Cc: linux-security-module Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook Reviewed-by: Serge Hallyn Reviewed-by: Mickael Salaun Reviewed-by: John Johansen Signed-off-by: Kees Cook Nacked-by: Tetsuo Handa [PM: forward ported beyond v6.6 due merge window changes] Signed-off-by: Paul Moore --- include/uapi/linux/lsm.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 include/uapi/linux/lsm.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lsm.h b/include/uapi/linux/lsm.h new file mode 100644 index 000000000000..f27c9a9cc376 --- /dev/null +++ b/include/uapi/linux/lsm.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Linux Security Modules (LSM) - User space API + * + * Copyright (C) 2022 Casey Schaufler + * Copyright (C) 2022 Intel Corporation + */ + +#ifndef _UAPI_LINUX_LSM_H +#define _UAPI_LINUX_LSM_H + +/* + * ID tokens to identify Linux Security Modules (LSMs) + * + * These token values are used to uniquely identify specific LSMs + * in the kernel as well as in the kernel's LSM userspace API. + * + * A value of zero/0 is considered undefined and should not be used + * outside the kernel. Values 1-99 are reserved for potential + * future use. + */ +#define LSM_ID_UNDEF 0 +#define LSM_ID_CAPABILITY 100 +#define LSM_ID_SELINUX 101 +#define LSM_ID_SMACK 102 +#define LSM_ID_TOMOYO 103 +#define LSM_ID_IMA 104 +#define LSM_ID_APPARMOR 105 +#define LSM_ID_YAMA 106 +#define LSM_ID_LOADPIN 107 +#define LSM_ID_SAFESETID 108 +#define LSM_ID_LOCKDOWN 109 +#define LSM_ID_BPF 110 +#define LSM_ID_LANDLOCK 111 + +/* + * LSM_ATTR_XXX definitions identify different LSM attributes + * which are used in the kernel's LSM userspace API. Support + * for these attributes vary across the different LSMs. None + * are required. + * + * A value of zero/0 is considered undefined and should not be used + * outside the kernel. Values 1-99 are reserved for potential + * future use. + */ +#define LSM_ATTR_UNDEF 0 +#define LSM_ATTR_CURRENT 100 +#define LSM_ATTR_EXEC 101 +#define LSM_ATTR_FSCREATE 102 +#define LSM_ATTR_KEYCREATE 103 +#define LSM_ATTR_PREV 104 +#define LSM_ATTR_SOCKCREATE 105 + +#endif /* _UAPI_LINUX_LSM_H */ -- cgit v1.2.3 From a04a1198088a1378d0389c250cc684f649bcc91e Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 12 Sep 2023 13:56:49 -0700 Subject: LSM: syscalls for current process attributes Create a system call lsm_get_self_attr() to provide the security module maintained attributes of the current process. Create a system call lsm_set_self_attr() to set a security module maintained attribute of the current process. Historically these attributes have been exposed to user space via entries in procfs under /proc/self/attr. The attribute value is provided in a lsm_ctx structure. The structure identifies the size of the attribute, and the attribute value. The format of the attribute value is defined by the security module. A flags field is included for LSM specific information. It is currently unused and must be 0. The total size of the data, including the lsm_ctx structure and any padding, is maintained as well. struct lsm_ctx { __u64 id; __u64 flags; __u64 len; __u64 ctx_len; __u8 ctx[]; }; Two new LSM hooks are used to interface with the LSMs. security_getselfattr() collects the lsm_ctx values from the LSMs that support the hook, accounting for space requirements. security_setselfattr() identifies which LSM the attribute is intended for and passes it along. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook Reviewed-by: Serge Hallyn Reviewed-by: John Johansen Signed-off-by: Paul Moore --- include/uapi/linux/lsm.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lsm.h b/include/uapi/linux/lsm.h index f27c9a9cc376..eeda59a77c02 100644 --- a/include/uapi/linux/lsm.h +++ b/include/uapi/linux/lsm.h @@ -9,6 +9,36 @@ #ifndef _UAPI_LINUX_LSM_H #define _UAPI_LINUX_LSM_H +#include +#include + +/** + * struct lsm_ctx - LSM context information + * @id: the LSM id number, see LSM_ID_XXX + * @flags: LSM specific flags + * @len: length of the lsm_ctx struct, @ctx and any other data or padding + * @ctx_len: the size of @ctx + * @ctx: the LSM context value + * + * The @len field MUST be equal to the size of the lsm_ctx struct + * plus any additional padding and/or data placed after @ctx. + * + * In all cases @ctx_len MUST be equal to the length of @ctx. + * If @ctx is a string value it should be nul terminated with + * @ctx_len equal to `strlen(@ctx) + 1`. Binary values are + * supported. + * + * The @flags and @ctx fields SHOULD only be interpreted by the + * LSM specified by @id; they MUST be set to zero/0 when not used. + */ +struct lsm_ctx { + __u64 id; + __u64 flags; + __u64 len; + __u64 ctx_len; + __u8 ctx[]; +}; + /* * ID tokens to identify Linux Security Modules (LSMs) * @@ -51,4 +81,10 @@ #define LSM_ATTR_PREV 104 #define LSM_ATTR_SOCKCREATE 105 +/* + * LSM_FLAG_XXX definitions identify special handling instructions + * for the API. + */ +#define LSM_FLAG_SINGLE 0x0001 + #endif /* _UAPI_LINUX_LSM_H */ -- cgit v1.2.3 From edd71f8e266c7ba15eedfec338864e53ddde1c25 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 18 Oct 2023 17:41:41 -0400 Subject: lsm: drop LSM_ID_IMA When IMA becomes a proper LSM we will reintroduce an appropriate LSM ID, but drop it from the userspace API for now in an effort to put an end to debates around the naming of the LSM ID macro. Reviewed-by: Roberto Sassu Signed-off-by: Paul Moore --- include/uapi/linux/lsm.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lsm.h b/include/uapi/linux/lsm.h index eeda59a77c02..f0386880a78e 100644 --- a/include/uapi/linux/lsm.h +++ b/include/uapi/linux/lsm.h @@ -54,14 +54,13 @@ struct lsm_ctx { #define LSM_ID_SELINUX 101 #define LSM_ID_SMACK 102 #define LSM_ID_TOMOYO 103 -#define LSM_ID_IMA 104 -#define LSM_ID_APPARMOR 105 -#define LSM_ID_YAMA 106 -#define LSM_ID_LOADPIN 107 -#define LSM_ID_SAFESETID 108 -#define LSM_ID_LOCKDOWN 109 -#define LSM_ID_BPF 110 -#define LSM_ID_LANDLOCK 111 +#define LSM_ID_APPARMOR 104 +#define LSM_ID_YAMA 105 +#define LSM_ID_LOADPIN 106 +#define LSM_ID_SAFESETID 107 +#define LSM_ID_LOCKDOWN 108 +#define LSM_ID_BPF 109 +#define LSM_ID_LANDLOCK 110 /* * LSM_ATTR_XXX definitions identify different LSM attributes -- cgit v1.2.3 From bb58b90b1a8f753b582055adaf448214a8e22c31 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 27 Oct 2023 11:21:50 -0700 Subject: KVM: Introduce KVM_SET_USER_MEMORY_REGION2 Introduce a "version 2" of KVM_SET_USER_MEMORY_REGION so that additional information can be supplied without setting userspace up to fail. The padding in the new kvm_userspace_memory_region2 structure will be used to pass a file descriptor in addition to the userspace_addr, i.e. allow userspace to point at a file descriptor and map memory into a guest that is NOT mapped into host userspace. Alternatively, KVM could simply add "struct kvm_userspace_memory_region2" without a new ioctl(), but as Paolo pointed out, adding a new ioctl() makes detection of bad flags a bit more robust, e.g. if the new fd field is guarded only by a flag and not a new ioctl(), then a userspace bug (setting a "bad" flag) would generate out-of-bounds access instead of an -EINVAL error. Cc: Jarkko Sakkinen Reviewed-by: Paolo Bonzini Reviewed-by: Xiaoyao Li Signed-off-by: Sean Christopherson Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Message-Id: <20231027182217.3615211-9-seanjc@google.com> Acked-by: Kai Huang Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 211b86de35ac..308cc70bd6ab 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -95,6 +95,16 @@ struct kvm_userspace_memory_region { __u64 userspace_addr; /* start of the userspace allocated memory */ }; +/* for KVM_SET_USER_MEMORY_REGION2 */ +struct kvm_userspace_memory_region2 { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; + __u64 userspace_addr; + __u64 pad[16]; +}; + /* * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for * userspace, other bits are reserved for kvm internal use which are defined @@ -1201,6 +1211,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 +#define KVM_CAP_USER_MEMORY2 231 #ifdef KVM_CAP_IRQ_ROUTING @@ -1483,6 +1494,8 @@ struct kvm_vfio_spapr_tce { struct kvm_userspace_memory_region) #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) +#define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \ + struct kvm_userspace_memory_region2) /* enable ucontrol for s390 */ struct kvm_s390_ucas_mapping { -- cgit v1.2.3 From 16f95f3b95caded251a0440051e44a2fbe9e5f55 Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Fri, 27 Oct 2023 11:21:51 -0700 Subject: KVM: Add KVM_EXIT_MEMORY_FAULT exit to report faults to userspace Add a new KVM exit type to allow userspace to handle memory faults that KVM cannot resolve, but that userspace *may* be able to handle (without terminating the guest). KVM will initially use KVM_EXIT_MEMORY_FAULT to report implicit conversions between private and shared memory. With guest private memory, there will be two kind of memory conversions: - explicit conversion: happens when the guest explicitly calls into KVM to map a range (as private or shared) - implicit conversion: happens when the guest attempts to access a gfn that is configured in the "wrong" state (private vs. shared) On x86 (first architecture to support guest private memory), explicit conversions will be reported via KVM_EXIT_HYPERCALL+KVM_HC_MAP_GPA_RANGE, but reporting KVM_EXIT_HYPERCALL for implicit conversions is undesriable as there is (obviously) no hypercall, and there is no guarantee that the guest actually intends to convert between private and shared, i.e. what KVM thinks is an implicit conversion "request" could actually be the result of a guest code bug. KVM_EXIT_MEMORY_FAULT will be used to report memory faults that appear to be implicit conversions. Note! To allow for future possibilities where KVM reports KVM_EXIT_MEMORY_FAULT and fills run->memory_fault on _any_ unresolved fault, KVM returns "-EFAULT" (-1 with errno == EFAULT from userspace's perspective), not '0'! Due to historical baggage within KVM, exiting to userspace with '0' from deep callstacks, e.g. in emulation paths, is infeasible as doing so would require a near-complete overhaul of KVM, whereas KVM already propagates -errno return codes to userspace even when the -errno originated in a low level helper. Report the gpa+size instead of a single gfn even though the initial usage is expected to always report single pages. It's entirely possible, likely even, that KVM will someday support sub-page granularity faults, e.g. Intel's sub-page protection feature allows for additional protections at 128-byte granularity. Link: https://lore.kernel.org/all/20230908222905.1321305-5-amoorthy@google.com Link: https://lore.kernel.org/all/ZQ3AmLO2SYv3DszH@google.com Cc: Anish Moorthy Cc: David Matlack Suggested-by: Sean Christopherson Co-developed-by: Yu Zhang Signed-off-by: Yu Zhang Signed-off-by: Chao Peng Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Reviewed-by: Paolo Bonzini Message-Id: <20231027182217.3615211-10-seanjc@google.com> Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: Xiaoyao Li Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 308cc70bd6ab..59010a685007 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -275,6 +275,7 @@ struct kvm_xen_exit { #define KVM_EXIT_RISCV_CSR 36 #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 +#define KVM_EXIT_MEMORY_FAULT 39 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -528,6 +529,12 @@ struct kvm_run { #define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) __u32 flags; } notify; + /* KVM_EXIT_MEMORY_FAULT */ + struct { + __u64 flags; + __u64 gpa; + __u64 size; + } memory_fault; /* Fix the size of the union. */ char padding[256]; }; @@ -1212,6 +1219,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 #define KVM_CAP_USER_MEMORY2 231 +#define KVM_CAP_MEMORY_FAULT_INFO 232 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 5a475554db1e476a14216e742ea2bdb77362d5d5 Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Fri, 27 Oct 2023 11:21:55 -0700 Subject: KVM: Introduce per-page memory attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In confidential computing usages, whether a page is private or shared is necessary information for KVM to perform operations like page fault handling, page zapping etc. There are other potential use cases for per-page memory attributes, e.g. to make memory read-only (or no-exec, or exec-only, etc.) without having to modify memslots. Introduce the KVM_SET_MEMORY_ATTRIBUTES ioctl, advertised by KVM_CAP_MEMORY_ATTRIBUTES, to allow userspace to set the per-page memory attributes to a guest memory range. Use an xarray to store the per-page attributes internally, with a naive, not fully optimized implementation, i.e. prioritize correctness over performance for the initial implementation. Use bit 3 for the PRIVATE attribute so that KVM can use bits 0-2 for RWX attributes/protections in the future, e.g. to give userspace fine-grained control over read, write, and execute protections for guest memory. Provide arch hooks for handling attribute changes before and after common code sets the new attributes, e.g. x86 will use the "pre" hook to zap all relevant mappings, and the "post" hook to track whether or not hugepages can be used to map the range. To simplify the implementation wrap the entire sequence with kvm_mmu_invalidate_{begin,end}() even though the operation isn't strictly guaranteed to be an invalidation. For the initial use case, x86 *will* always invalidate memory, and preventing arch code from creating new mappings while the attributes are in flux makes it much easier to reason about the correctness of consuming attributes. It's possible that future usages may not require an invalidation, e.g. if KVM ends up supporting RWX protections and userspace grants _more_ protections, but again opt for simplicity and punt optimizations to if/when they are needed. Suggested-by: Sean Christopherson Link: https://lore.kernel.org/all/Y2WB48kD0J4VGynX@google.com Cc: Fuad Tabba Cc: Xu Yilun Cc: Mickaël Salaün Signed-off-by: Chao Peng Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-14-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 59010a685007..e8d167e54980 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1220,6 +1220,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 #define KVM_CAP_USER_MEMORY2 231 #define KVM_CAP_MEMORY_FAULT_INFO 232 +#define KVM_CAP_MEMORY_ATTRIBUTES 233 #ifdef KVM_CAP_IRQ_ROUTING @@ -2288,4 +2289,16 @@ struct kvm_s390_zpci_op { /* flags for kvm_s390_zpci_op->u.reg_aen.flags */ #define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) +/* Available with KVM_CAP_MEMORY_ATTRIBUTES */ +#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes) + +struct kvm_memory_attributes { + __u64 address; + __u64 size; + __u64 attributes; + __u64 flags; +}; + +#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From 07afe1ba288c04280622fa002ed385f1ac0b6fe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Thu, 7 Sep 2023 03:09:08 +0200 Subject: batman-adv: mcast: implement multicast packet reception and forwarding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement functionality to receive and forward a new TVLV capable multicast packet type. The new batman-adv multicast packet type allows to contain several originator destination addresses within a TVLV. Routers on the way will potentially split the batman-adv multicast packet and adjust its tracker TVLV contents. Routing decisions are still based on the selected BATMAN IV or BATMAN V routing algorithm. So this new batman-adv multicast packet type retains the same loop-free properties. Also a new OGM multicast TVLV flag is introduced to signal to other nodes that we are capable of handling a batman-adv multicast packet and multicast tracker TVLV. And that all of our hard interfaces have an MTU of at least 1280 bytes (IPv6 minimum MTU), as a simple solution for now to avoid MTU issues while forwarding. Signed-off-by: Linus Lüssing Signed-off-by: Simon Wunderlich --- include/uapi/linux/batadv_packet.h | 45 +++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index 9204e4494b25..6e25753015df 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -116,6 +116,9 @@ enum batadv_icmp_packettype { * only need routable IPv4 multicast packets we signed up for explicitly * @BATADV_MCAST_WANT_NO_RTR6: we have no IPv6 multicast router and therefore * only need routable IPv6 multicast packets we signed up for explicitly + * @BATADV_MCAST_HAVE_MC_PTYPE_CAPA: we can parse, receive and forward + * batman-adv multicast packets with a multicast tracker TVLV. And all our + * hard interfaces have an MTU of at least 1280 bytes. */ enum batadv_mcast_flags { BATADV_MCAST_WANT_ALL_UNSNOOPABLES = 1UL << 0, @@ -123,6 +126,7 @@ enum batadv_mcast_flags { BATADV_MCAST_WANT_ALL_IPV6 = 1UL << 2, BATADV_MCAST_WANT_NO_RTR4 = 1UL << 3, BATADV_MCAST_WANT_NO_RTR6 = 1UL << 4, + BATADV_MCAST_HAVE_MC_PTYPE_CAPA = 1UL << 5, }; /* tt data subtypes */ @@ -174,14 +178,16 @@ enum batadv_bla_claimframe { * @BATADV_TVLV_TT: translation table tvlv * @BATADV_TVLV_ROAM: roaming advertisement tvlv * @BATADV_TVLV_MCAST: multicast capability tvlv + * @BATADV_TVLV_MCAST_TRACKER: multicast tracker tvlv */ enum batadv_tvlv_type { - BATADV_TVLV_GW = 0x01, - BATADV_TVLV_DAT = 0x02, - BATADV_TVLV_NC = 0x03, - BATADV_TVLV_TT = 0x04, - BATADV_TVLV_ROAM = 0x05, - BATADV_TVLV_MCAST = 0x06, + BATADV_TVLV_GW = 0x01, + BATADV_TVLV_DAT = 0x02, + BATADV_TVLV_NC = 0x03, + BATADV_TVLV_TT = 0x04, + BATADV_TVLV_ROAM = 0x05, + BATADV_TVLV_MCAST = 0x06, + BATADV_TVLV_MCAST_TRACKER = 0x07, }; #pragma pack(2) @@ -487,6 +493,25 @@ struct batadv_bcast_packet { */ }; +/** + * struct batadv_mcast_packet - multicast packet for network payload + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header + * @reserved: reserved byte for alignment + * @tvlv_len: length of the appended tvlv buffer (in bytes) + */ +struct batadv_mcast_packet { + __u8 packet_type; + __u8 version; + __u8 ttl; + __u8 reserved; + __be16 tvlv_len; + /* "4 bytes boundary + 2 bytes" long to make the payload after the + * following ethernet header again 4 bytes boundary aligned + */ +}; + /** * struct batadv_coded_packet - network coded packet * @packet_type: batman-adv packet type, part of the general header @@ -628,6 +653,14 @@ struct batadv_tvlv_mcast_data { __u8 reserved[3]; }; +/** + * struct batadv_tvlv_mcast_tracker - payload of a multicast tracker tvlv + * @num_dests: number of subsequent destination originator MAC addresses + */ +struct batadv_tvlv_mcast_tracker { + __be16 num_dests; +}; + #pragma pack() #endif /* _UAPI_LINUX_BATADV_PACKET_H_ */ -- cgit v1.2.3 From a7800aa80ea4d5356b8474c2302812e9d4926fa6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 13 Nov 2023 05:42:34 -0500 Subject: KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce an ioctl(), KVM_CREATE_GUEST_MEMFD, to allow creating file-based memory that is tied to a specific KVM virtual machine and whose primary purpose is to serve guest memory. A guest-first memory subsystem allows for optimizations and enhancements that are kludgy or outright infeasible to implement/support in a generic memory subsystem. With guest_memfd, guest protections and mapping sizes are fully decoupled from host userspace mappings. E.g. KVM currently doesn't support mapping memory as writable in the guest without it also being writable in host userspace, as KVM's ABI uses VMA protections to define the allow guest protection. Userspace can fudge this by establishing two mappings, a writable mapping for the guest and readable one for itself, but that’s suboptimal on multiple fronts. Similarly, KVM currently requires the guest mapping size to be a strict subset of the host userspace mapping size, e.g. KVM doesn’t support creating a 1GiB guest mapping unless userspace also has a 1GiB guest mapping. Decoupling the mappings sizes would allow userspace to precisely map only what is needed without impacting guest performance, e.g. to harden against unintentional accesses to guest memory. Decoupling guest and userspace mappings may also allow for a cleaner alternative to high-granularity mappings for HugeTLB, which has reached a bit of an impasse and is unlikely to ever be merged. A guest-first memory subsystem also provides clearer line of sight to things like a dedicated memory pool (for slice-of-hardware VMs) and elimination of "struct page" (for offload setups where userspace _never_ needs to mmap() guest memory). More immediately, being able to map memory into KVM guests without mapping said memory into the host is critical for Confidential VMs (CoCo VMs), the initial use case for guest_memfd. While AMD's SEV and Intel's TDX prevent untrusted software from reading guest private data by encrypting guest memory with a key that isn't usable by the untrusted host, projects such as Protected KVM (pKVM) provide confidentiality and integrity *without* relying on memory encryption. And with SEV-SNP and TDX, accessing guest private memory can be fatal to the host, i.e. KVM must be prevent host userspace from accessing guest memory irrespective of hardware behavior. Attempt #1 to support CoCo VMs was to add a VMA flag to mark memory as being mappable only by KVM (or a similarly enlightened kernel subsystem). That approach was abandoned largely due to it needing to play games with PROT_NONE to prevent userspace from accessing guest memory. Attempt #2 to was to usurp PG_hwpoison to prevent the host from mapping guest private memory into userspace, but that approach failed to meet several requirements for software-based CoCo VMs, e.g. pKVM, as the kernel wouldn't easily be able to enforce a 1:1 page:guest association, let alone a 1:1 pfn:gfn mapping. And using PG_hwpoison does not work for memory that isn't backed by 'struct page', e.g. if devices gain support for exposing encrypted memory regions to guests. Attempt #3 was to extend the memfd() syscall and wrap shmem to provide dedicated file-based guest memory. That approach made it as far as v10 before feedback from Hugh Dickins and Christian Brauner (and others) led to it demise. Hugh's objection was that piggybacking shmem made no sense for KVM's use case as KVM didn't actually *want* the features provided by shmem. I.e. KVM was using memfd() and shmem to avoid having to manage memory directly, not because memfd() and shmem were the optimal solution, e.g. things like read/write/mmap in shmem were dead weight. Christian pointed out flaws with implementing a partial overlay (wrapping only _some_ of shmem), e.g. poking at inode_operations or super_operations would show shmem stuff, but address_space_operations and file_operations would show KVM's overlay. Paraphrashing heavily, Christian suggested KVM stop being lazy and create a proper API. Link: https://lore.kernel.org/all/20201020061859.18385-1-kirill.shutemov@linux.intel.com Link: https://lore.kernel.org/all/20210416154106.23721-1-kirill.shutemov@linux.intel.com Link: https://lore.kernel.org/all/20210824005248.200037-1-seanjc@google.com Link: https://lore.kernel.org/all/20211111141352.26311-1-chao.p.peng@linux.intel.com Link: https://lore.kernel.org/all/20221202061347.1070246-1-chao.p.peng@linux.intel.com Link: https://lore.kernel.org/all/ff5c5b97-acdf-9745-ebe5-c6609dd6322e@google.com Link: https://lore.kernel.org/all/20230418-anfallen-irdisch-6993a61be10b@brauner Link: https://lore.kernel.org/all/ZEM5Zq8oo+xnApW9@google.com Link: https://lore.kernel.org/linux-mm/20230306191944.GA15773@monkey Link: https://lore.kernel.org/linux-mm/ZII1p8ZHlHaQ3dDl@casper.infradead.org Cc: Fuad Tabba Cc: Vishal Annapurve Cc: Ackerley Tng Cc: Jarkko Sakkinen Cc: Maciej Szmigiero Cc: Vlastimil Babka Cc: David Hildenbrand Cc: Quentin Perret Cc: Michael Roth Cc: Wang Cc: Liam Merwick Cc: Isaku Yamahata Co-developed-by: Kirill A. Shutemov Signed-off-by: Kirill A. Shutemov Co-developed-by: Yu Zhang Signed-off-by: Yu Zhang Co-developed-by: Chao Peng Signed-off-by: Chao Peng Co-developed-by: Ackerley Tng Signed-off-by: Ackerley Tng Co-developed-by: Isaku Yamahata Signed-off-by: Isaku Yamahata Co-developed-by: Paolo Bonzini Signed-off-by: Paolo Bonzini Co-developed-by: Michael Roth Signed-off-by: Michael Roth Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-17-seanjc@google.com> Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: Xiaoyao Li Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e8d167e54980..2802d10aa88c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -102,7 +102,10 @@ struct kvm_userspace_memory_region2 { __u64 guest_phys_addr; __u64 memory_size; __u64 userspace_addr; - __u64 pad[16]; + __u64 guest_memfd_offset; + __u32 guest_memfd; + __u32 pad1; + __u64 pad2[14]; }; /* @@ -112,6 +115,7 @@ struct kvm_userspace_memory_region2 { */ #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) +#define KVM_MEM_GUEST_MEMFD (1UL << 2) /* for KVM_IRQ_LINE */ struct kvm_irq_level { @@ -1221,6 +1225,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_USER_MEMORY2 231 #define KVM_CAP_MEMORY_FAULT_INFO 232 #define KVM_CAP_MEMORY_ATTRIBUTES 233 +#define KVM_CAP_GUEST_MEMFD 234 #ifdef KVM_CAP_IRQ_ROUTING @@ -2301,4 +2306,12 @@ struct kvm_memory_attributes { #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) +#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) + +struct kvm_create_guest_memfd { + __u64 size; + __u64 flags; + __u64 reserved[6]; +}; + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From 8dd2eee9d526c30fccfe75da7ec5365c6476e510 Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Fri, 27 Oct 2023 11:22:02 -0700 Subject: KVM: x86/mmu: Handle page fault for private memory Add support for resolving page faults on guest private memory for VMs that differentiate between "shared" and "private" memory. For such VMs, KVM_MEM_GUEST_MEMFD memslots can include both fd-based private memory and hva-based shared memory, and KVM needs to map in the "correct" variant, i.e. KVM needs to map the gfn shared/private as appropriate based on the current state of the gfn's KVM_MEMORY_ATTRIBUTE_PRIVATE flag. For AMD's SEV-SNP and Intel's TDX, the guest effectively gets to request shared vs. private via a bit in the guest page tables, i.e. what the guest wants may conflict with the current memory attributes. To support such "implicit" conversion requests, exit to user with KVM_EXIT_MEMORY_FAULT to forward the request to userspace. Add a new flag for memory faults, KVM_MEMORY_EXIT_FLAG_PRIVATE, to communicate whether the guest wants to map memory as shared vs. private. Like KVM_MEMORY_ATTRIBUTE_PRIVATE, use bit 3 for flagging private memory so that KVM can use bits 0-2 for capturing RWX behavior if/when userspace needs such information, e.g. a likely user of KVM_EXIT_MEMORY_FAULT is to exit on missing mappings when handling guest page fault VM-Exits. In that case, userspace will want to know RWX information in order to correctly/precisely resolve the fault. Note, private memory *must* be backed by guest_memfd, i.e. shared mappings always come from the host userspace page tables, and private mappings always come from a guest_memfd instance. Co-developed-by: Yu Zhang Signed-off-by: Yu Zhang Signed-off-by: Chao Peng Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Message-Id: <20231027182217.3615211-21-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 2802d10aa88c..8eb10f560c69 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -535,6 +535,7 @@ struct kvm_run { } notify; /* KVM_EXIT_MEMORY_FAULT */ struct { +#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3) __u64 flags; __u64 gpa; __u64 size; -- cgit v1.2.3 From 89ea60c2c7b5838bf192c50062d5720cd6ab8662 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 27 Oct 2023 11:22:05 -0700 Subject: KVM: x86: Add support for "protected VMs" that can utilize private memory Add a new x86 VM type, KVM_X86_SW_PROTECTED_VM, to serve as a development and testing vehicle for Confidential (CoCo) VMs, and potentially to even become a "real" product in the distant future, e.g. a la pKVM. The private memory support in KVM x86 is aimed at AMD's SEV-SNP and Intel's TDX, but those technologies are extremely complex (understatement), difficult to debug, don't support running as nested guests, and require hardware that's isn't universally accessible. I.e. relying SEV-SNP or TDX for maintaining guest private memory isn't a realistic option. At the very least, KVM_X86_SW_PROTECTED_VM will enable a variety of selftests for guest_memfd and private memory support without requiring unique hardware. Signed-off-by: Sean Christopherson Reviewed-by: Paolo Bonzini Message-Id: <20231027182217.3615211-24-seanjc@google.com> Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 8eb10f560c69..e9cb2df67a1d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1227,6 +1227,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_MEMORY_FAULT_INFO 232 #define KVM_CAP_MEMORY_ATTRIBUTES 233 #define KVM_CAP_GUEST_MEMFD 234 +#define KVM_CAP_VM_TYPES 235 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 5f99f312bd3bedb3b266b0d26376a8c500cdc97f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 11 Nov 2023 17:06:00 -0800 Subject: bpf: add register bounds sanity checks and sanitization Add simple sanity checks that validate well-formed ranges (min <= max) across u64, s64, u32, and s32 ranges. Also for cases when the value is constant (either 64-bit or 32-bit), we validate that ranges and tnums are in agreement. These bounds checks are performed at the end of BPF_ALU/BPF_ALU64 operations, on conditional jumps, and for LDX instructions (where subreg zero/sign extension is probably the most important to check). This covers most of the interesting cases. Also, we validate the sanity of the return register when manually adjusting it for some special helpers. By default, sanity violation will trigger a warning in verifier log and resetting register bounds to "unbounded" ones. But to aid development and debugging, BPF_F_TEST_SANITY_STRICT flag is added, which will trigger hard failure of verification with -EFAULT on register bounds violations. This allows selftests to catch such issues. veristat will also gain a CLI option to enable this behavior. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20231112010609.848406-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7cf8bcf9f6a2..8a5855fcee69 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1200,6 +1200,9 @@ enum bpf_perf_event_type { */ #define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) +/* The verifier internal test flag. Behavior is undefined */ +#define BPF_F_TEST_SANITY_STRICT (1U << 7) + /* link_create.kprobe_multi.flags used in LINK_CREATE command for * BPF_TRACE_KPROBE_MULTI attach type to create return probe. */ -- cgit v1.2.3 From 5d33213fac5929a2e7766c88d78779fd443b0fe8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Nov 2023 10:39:24 +0300 Subject: media: v4l2-subdev: Fix a 64bit bug The problem is this line here from subdev_do_ioctl(). client_cap->capabilities &= ~V4L2_SUBDEV_CLIENT_CAP_STREAMS; The "client_cap->capabilities" variable is a u64. The AND operation is supposed to clear out the V4L2_SUBDEV_CLIENT_CAP_STREAMS flag. But because it's a 32 bit variable it accidentally clears out the high 32 bits as well. Currently we only use the first bit and none of the upper bits so this doesn't affect runtime behavior. Fixes: f57fa2959244 ("media: v4l2-subdev: Add new ioctl for client capabilities") Signed-off-by: Dan Carpenter Reviewed-by: Tomi Valkeinen Signed-off-by: Hans Verkuil --- include/uapi/linux/v4l2-subdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h index 4a195b68f28f..b383c2fe0cf3 100644 --- a/include/uapi/linux/v4l2-subdev.h +++ b/include/uapi/linux/v4l2-subdev.h @@ -239,7 +239,7 @@ struct v4l2_subdev_routing { * set (which is the default), the 'stream' fields will be forced to 0 by the * kernel. */ - #define V4L2_SUBDEV_CLIENT_CAP_STREAMS (1U << 0) + #define V4L2_SUBDEV_CLIENT_CAP_STREAMS (1ULL << 0) /** * struct v4l2_subdev_client_capability - Capabilities of the client accessing -- cgit v1.2.3 From c6e9dba3be5ef3b701b29b143609561915e5d0e9 Mon Sep 17 00:00:00 2001 From: Alce Lafranque Date: Tue, 14 Nov 2023 11:36:57 -0600 Subject: vxlan: add support for flowlabel inherit By default, VXLAN encapsulation over IPv6 sets the flow label to 0, with an option for a fixed value. This commits add the ability to inherit the flow label from the inner packet, like for other tunnel implementations. This enables devices using only L3 headers for ECMP to correctly balance VXLAN-encapsulated IPv6 packets. ``` $ ./ip/ip link add dummy1 type dummy $ ./ip/ip addr add 2001:db8::2/64 dev dummy1 $ ./ip/ip link set up dev dummy1 $ ./ip/ip link add vxlan1 type vxlan id 100 flowlabel inherit remote 2001:db8::1 local 2001:db8::2 $ ./ip/ip link set up dev vxlan1 $ ./ip/ip addr add 2001:db8:1::2/64 dev vxlan1 $ ./ip/ip link set arp off dev vxlan1 $ ping -q 2001:db8:1::1 & $ tshark -d udp.port==8472,vxlan -Vpni dummy1 -c1 [...] Internet Protocol Version 6, Src: 2001:db8::2, Dst: 2001:db8::1 0110 .... = Version: 6 .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT) .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0) .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0) .... 1011 0001 1010 1111 1011 = Flow Label: 0xb1afb [...] Virtual eXtensible Local Area Network Flags: 0x0800, VXLAN Network ID (VNI) Group Policy ID: 0 VXLAN Network Identifier (VNI): 100 [...] Internet Protocol Version 6, Src: 2001:db8:1::2, Dst: 2001:db8:1::1 0110 .... = Version: 6 .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT) .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0) .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0) .... 1011 0001 1010 1111 1011 = Flow Label: 0xb1afb ``` Signed-off-by: Alce Lafranque Co-developed-by: Vincent Bernat Signed-off-by: Vincent Bernat Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 29ff80da2775..8181ef23a7a2 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -856,6 +856,7 @@ enum { IFLA_VXLAN_DF, IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ IFLA_VXLAN_LOCALBYPASS, + IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */ __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) @@ -873,6 +874,13 @@ enum ifla_vxlan_df { VXLAN_DF_MAX = __VXLAN_DF_END - 1, }; +enum ifla_vxlan_label_policy { + VXLAN_LABEL_FIXED = 0, + VXLAN_LABEL_INHERIT = 1, + __VXLAN_LABEL_END, + VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1, +}; + /* GENEVE section */ enum { IFLA_GENEVE_UNSPEC, -- cgit v1.2.3 From ff8867af01daa7ea770bebf5f91199b7434b74e5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 17 Nov 2023 09:14:04 -0800 Subject: bpf: rename BPF_F_TEST_SANITY_STRICT to BPF_F_TEST_REG_INVARIANTS Rename verifier internal flag BPF_F_TEST_SANITY_STRICT to more neutral BPF_F_TEST_REG_INVARIANTS. This is a follow up to [0]. A few selftests and veristat need to be adjusted in the same patch as well. [0] https://patchwork.kernel.org/project/netdevbpf/patch/20231112010609.848406-5-andrii@kernel.org/ Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231117171404.225508-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8a5855fcee69..7a5498242eaa 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1201,7 +1201,7 @@ enum bpf_perf_event_type { #define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) /* The verifier internal test flag. Behavior is undefined */ -#define BPF_F_TEST_SANITY_STRICT (1U << 7) +#define BPF_F_TEST_REG_INVARIANTS (1U << 7) /* link_create.kprobe_multi.flags used in LINK_CREATE command for * BPF_TRACE_KPROBE_MULTI attach type to create return probe. -- cgit v1.2.3 From 98d2b43081972abeb5bb5a087bc3e3197531c46e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 25 Oct 2023 16:01:59 +0200 Subject: add unique mount ID If a mount is released then its mnt_id can immediately be reused. This is bad news for user interfaces that want to uniquely identify a mount. Implementing a unique mount ID is trivial (use a 64bit counter). Unfortunately userspace assumes 32bit size and would overflow after the counter reaches 2^32. Introduce a new 64bit ID alongside the old one. Initialize the counter to 2^32, this guarantees that the old and new IDs are never mixed up. Signed-off-by: Miklos Szeredi Link: https://lore.kernel.org/r/20231025140205.3586473-2-mszeredi@redhat.com Reviewed-by: Ian Kent Signed-off-by: Christian Brauner --- include/uapi/linux/stat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 7cab2c65d3d7..2f2ee82d5517 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -154,6 +154,7 @@ struct statx { #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ #define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ +#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ -- cgit v1.2.3 From acec05fb78abb74fdab2195bfca9a6d38a732643 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 14 Nov 2023 12:28:35 +0100 Subject: net_tstamp: Add TIMESTAMPING SOFTWARE and HARDWARE mask Timestamping software or hardware flags are often used as a group, therefore adding these masks will easier future use. I did not use SOF_TIMESTAMPING_SYS_HARDWARE flag as it is deprecated and not use at all. Signed-off-by: Kory Maincent Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/net_tstamp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index a2c66b3d7f0f..df8091998c8d 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -48,6 +48,14 @@ enum { SOF_TIMESTAMPING_TX_SCHED | \ SOF_TIMESTAMPING_TX_ACK) +#define SOF_TIMESTAMPING_SOFTWARE_MASK (SOF_TIMESTAMPING_RX_SOFTWARE | \ + SOF_TIMESTAMPING_TX_SOFTWARE | \ + SOF_TIMESTAMPING_SOFTWARE) + +#define SOF_TIMESTAMPING_HARDWARE_MASK (SOF_TIMESTAMPING_RX_HARDWARE | \ + SOF_TIMESTAMPING_TX_HARDWARE | \ + SOF_TIMESTAMPING_RAW_HARDWARE) + /** * struct so_timestamping - SO_TIMESTAMPING parameter * -- cgit v1.2.3 From 11d55be06df0aedf19b05ab61c2d26b31a3c7e64 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 14 Nov 2023 12:28:36 +0100 Subject: net: ethtool: Add a command to expose current time stamping layer Time stamping on network packets may happen either in the MAC or in the PHY, but not both. In preparation for making the choice selectable, expose both the current layers via ethtool. In accordance with the kernel implementation as it stands, the current layer will always read as "phy" when a PHY time stamping device is present. Future patches will allow changing the current layer administratively. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 14 ++++++++++++++ include/uapi/linux/net_tstamp.h | 10 ++++++++++ 2 files changed, 24 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 73e2c10dc2cc..cb51136328cf 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -57,6 +57,7 @@ enum { ETHTOOL_MSG_PLCA_GET_STATUS, ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, + ETHTOOL_MSG_TS_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -109,6 +110,7 @@ enum { ETHTOOL_MSG_PLCA_NTF, ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, + ETHTOOL_MSG_TS_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -975,6 +977,18 @@ enum { ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; +/* TS LAYER */ + +enum { + ETHTOOL_A_TS_UNSPEC, + ETHTOOL_A_TS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_TS_LAYER, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_TS_CNT, + ETHTOOL_A_TS_MAX = (__ETHTOOL_A_TS_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index df8091998c8d..4551fb3d7720 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -13,6 +13,16 @@ #include #include /* for SO_TIMESTAMPING */ +/* Layer of the TIMESTAMPING provider */ +enum timestamping_layer { + NO_TIMESTAMPING, + SOFTWARE_TIMESTAMPING, + MAC_TIMESTAMPING, + PHY_TIMESTAMPING, + + __TIMESTAMPING_COUNT, +}; + /* SO_TIMESTAMPING flags */ enum { SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), -- cgit v1.2.3 From d905f9c753295ee5a30af265f4b724f10050e7d3 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 14 Nov 2023 12:28:38 +0100 Subject: net: ethtool: Add a command to list available time stamping layers Introduce a new netlink message that lists all available time stamping layers on a given interface. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index cb51136328cf..62b885d44d06 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -58,6 +58,7 @@ enum { ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, ETHTOOL_MSG_TS_GET, + ETHTOOL_MSG_TS_LIST_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -111,6 +112,7 @@ enum { ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, ETHTOOL_MSG_TS_GET_REPLY, + ETHTOOL_MSG_TS_LIST_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -989,6 +991,18 @@ enum { ETHTOOL_A_TS_MAX = (__ETHTOOL_A_TS_CNT - 1) }; +/* TS LIST LAYER */ + +enum { + ETHTOOL_A_TS_LIST_UNSPEC, + ETHTOOL_A_TS_LIST_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_TS_LIST_LAYER, /* array, u32 */ + + /* add new constants above here */ + __ETHTOOL_A_TS_LIST_CNT, + ETHTOOL_A_TS_LIST_MAX = (__ETHTOOL_A_TS_LIST_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3 From 152c75e1d00200edc4da1beb67dd099a462ea86b Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 14 Nov 2023 12:28:43 +0100 Subject: net: ethtool: ts: Let the active time stamping layer be selectable Now that the current timestamp is saved in a variable lets add the ETHTOOL_MSG_TS_SET ethtool netlink socket to make it selectable. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 62b885d44d06..df6c4fcc62c1 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -59,6 +59,7 @@ enum { ETHTOOL_MSG_MM_SET, ETHTOOL_MSG_TS_GET, ETHTOOL_MSG_TS_LIST_GET, + ETHTOOL_MSG_TS_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, -- cgit v1.2.3 From 289354f21b2c3fac93e956efd45f256a88a4d997 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 18 Nov 2023 18:38:05 -0800 Subject: net: partial revert of the "Make timestamping selectable: series Revert following commits: commit acec05fb78ab ("net_tstamp: Add TIMESTAMPING SOFTWARE and HARDWARE mask") commit 11d55be06df0 ("net: ethtool: Add a command to expose current time stamping layer") commit bb8645b00ced ("netlink: specs: Introduce new netlink command to get current timestamp") commit d905f9c75329 ("net: ethtool: Add a command to list available time stamping layers") commit aed5004ee7a0 ("netlink: specs: Introduce new netlink command to list available time stamping layers") commit 51bdf3165f01 ("net: Replace hwtstamp_source by timestamping layer") commit 0f7f463d4821 ("net: Change the API of PHY default timestamp to MAC") commit 091fab122869 ("net: ethtool: ts: Update GET_TS to reply the current selected timestamp") commit 152c75e1d002 ("net: ethtool: ts: Let the active time stamping layer be selectable") commit ee60ea6be0d3 ("netlink: specs: Introduce time stamping set command") They need more time for reviews. Link: https://lore.kernel.org/all/20231118183529.6e67100c@kernel.org/ Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 29 ----------------------------- include/uapi/linux/net_tstamp.h | 18 ------------------ 2 files changed, 47 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index df6c4fcc62c1..73e2c10dc2cc 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -57,9 +57,6 @@ enum { ETHTOOL_MSG_PLCA_GET_STATUS, ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, - ETHTOOL_MSG_TS_GET, - ETHTOOL_MSG_TS_LIST_GET, - ETHTOOL_MSG_TS_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -112,8 +109,6 @@ enum { ETHTOOL_MSG_PLCA_NTF, ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, - ETHTOOL_MSG_TS_GET_REPLY, - ETHTOOL_MSG_TS_LIST_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -980,30 +975,6 @@ enum { ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; -/* TS LAYER */ - -enum { - ETHTOOL_A_TS_UNSPEC, - ETHTOOL_A_TS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_TS_LAYER, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_TS_CNT, - ETHTOOL_A_TS_MAX = (__ETHTOOL_A_TS_CNT - 1) -}; - -/* TS LIST LAYER */ - -enum { - ETHTOOL_A_TS_LIST_UNSPEC, - ETHTOOL_A_TS_LIST_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_TS_LIST_LAYER, /* array, u32 */ - - /* add new constants above here */ - __ETHTOOL_A_TS_LIST_CNT, - ETHTOOL_A_TS_LIST_MAX = (__ETHTOOL_A_TS_LIST_CNT - 1) -}; - /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 4551fb3d7720..a2c66b3d7f0f 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -13,16 +13,6 @@ #include #include /* for SO_TIMESTAMPING */ -/* Layer of the TIMESTAMPING provider */ -enum timestamping_layer { - NO_TIMESTAMPING, - SOFTWARE_TIMESTAMPING, - MAC_TIMESTAMPING, - PHY_TIMESTAMPING, - - __TIMESTAMPING_COUNT, -}; - /* SO_TIMESTAMPING flags */ enum { SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), @@ -58,14 +48,6 @@ enum { SOF_TIMESTAMPING_TX_SCHED | \ SOF_TIMESTAMPING_TX_ACK) -#define SOF_TIMESTAMPING_SOFTWARE_MASK (SOF_TIMESTAMPING_RX_SOFTWARE | \ - SOF_TIMESTAMPING_TX_SOFTWARE | \ - SOF_TIMESTAMPING_SOFTWARE) - -#define SOF_TIMESTAMPING_HARDWARE_MASK (SOF_TIMESTAMPING_RX_HARDWARE | \ - SOF_TIMESTAMPING_TX_HARDWARE | \ - SOF_TIMESTAMPING_RAW_HARDWARE) - /** * struct so_timestamping - SO_TIMESTAMPING parameter * -- cgit v1.2.3 From d055a76c006540defd4eb80dcdea217cee0a141a Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Thu, 9 Nov 2023 17:35:00 +0100 Subject: media: core: Report the maximum possible number of buffers for the queue Use one of the struct v4l2_create_buffers reserved bytes to report the maximum possible number of buffers for the queue. V4l2 framework set V4L2_BUF_CAP_SUPPORTS_MAX_NUM_BUFFERS flags in queue capabilities so userland can know when the field is valid. Does the same change in v4l2_create_buffers32 structure. Signed-off-by: Benjamin Gaignard Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index c3d4e490ce7c..13ddb5abf584 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1035,6 +1035,7 @@ struct v4l2_requestbuffers { #define V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS (1 << 4) #define V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF (1 << 5) #define V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS (1 << 6) +#define V4L2_BUF_CAP_SUPPORTS_MAX_NUM_BUFFERS (1 << 7) /** * struct v4l2_plane - plane info for multi-planar buffers @@ -2605,6 +2606,9 @@ struct v4l2_dbg_chip_info { * @flags: additional buffer management attributes (ignored unless the * queue has V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS capability * and configured for MMAP streaming I/O). + * @max_num_buffers: if V4L2_BUF_CAP_SUPPORTS_MAX_NUM_BUFFERS capability flag is set + * this field indicate the maximum possible number of buffers + * for this queue. * @reserved: future extensions */ struct v4l2_create_buffers { @@ -2614,7 +2618,8 @@ struct v4l2_create_buffers { struct v4l2_format format; __u32 capabilities; __u32 flags; - __u32 reserved[6]; + __u32 max_num_buffers; + __u32 reserved[5]; }; /* -- cgit v1.2.3 From 70be8a84017a4454429abc40dec40a1f845c7827 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 14 Nov 2023 11:39:44 +0100 Subject: media: videodev2.h: add missing __user to p_h264_pps The p_h264_pps pointer in struct v4l2_ext_control was missing the __user annotation. Add this. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 13ddb5abf584..c3fc710ef9a7 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1817,7 +1817,7 @@ struct v4l2_ext_control { __s64 __user *p_s64; struct v4l2_area __user *p_area; struct v4l2_ctrl_h264_sps __user *p_h264_sps; - struct v4l2_ctrl_h264_pps *p_h264_pps; + struct v4l2_ctrl_h264_pps __user *p_h264_pps; struct v4l2_ctrl_h264_scaling_matrix __user *p_h264_scaling_matrix; struct v4l2_ctrl_h264_pred_weights __user *p_h264_pred_weights; struct v4l2_ctrl_h264_slice_params __user *p_h264_slice_params; -- cgit v1.2.3 From 26846dda3eca07cbb8dd481421ae52b31ef232d5 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 14 Nov 2023 11:50:36 +0100 Subject: media: videodev.h: add missing p_hdr10_* pointers The HDR10 standard compound controls were missing the corresponding pointers in videodev2.h. Add these and document them. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index c3fc710ef9a7..68e7ac178cc2 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1838,6 +1838,8 @@ struct v4l2_ext_control { struct v4l2_ctrl_av1_tile_group_entry __user *p_av1_tile_group_entry; struct v4l2_ctrl_av1_frame __user *p_av1_frame; struct v4l2_ctrl_av1_film_grain __user *p_av1_film_grain; + struct v4l2_ctrl_hdr10_cll_info __user *p_hdr10_cll_info; + struct v4l2_ctrl_hdr10_mastering_display __user *p_hdr10_mastering_display; void __user *ptr; }; } __attribute__ ((packed)); -- cgit v1.2.3 From 6285ee30caa1a0fbd9537496578085c143127eee Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 13 Nov 2023 11:35:00 +0200 Subject: wifi: cfg80211: Extend support for scanning while MLO connected To extend the support of TSF accounting in scan results for MLO connections, allow to indicate in the scan request the link ID corresponding to the BSS whose TSF should be used for the TSF accounting. Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20231113112844.d4490bcdefb1.I8fcd158b810adddef4963727e9153096416b30ce@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index dced2c49daec..03e44823355e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -6241,9 +6241,11 @@ enum nl80211_feature_flags { * the BSS that the interface that requested the scan is connected to * (if available). * @NL80211_EXT_FEATURE_BSS_PARENT_TSF: Per BSS, this driver reports the - * time the last beacon/probe was received. The time is the TSF of the - * BSS that the interface that requested the scan is connected to - * (if available). + * time the last beacon/probe was received. For a non MLO connection, the + * time is the TSF of the BSS that the interface that requested the scan is + * connected to (if available). For an MLO connection, the time is the TSF + * of the BSS corresponding with link ID specified in the scan request (if + * specified). * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of * channel dwell time. * @NL80211_EXT_FEATURE_BEACON_RATE_LEGACY: Driver supports beacon rate -- cgit v1.2.3 From 0cc3f50f42d262d6175ee2834aeb56e98934cfcc Mon Sep 17 00:00:00 2001 From: Vinayak Yadawad Date: Thu, 9 Nov 2023 12:03:44 +0530 Subject: wifi: nl80211: Documentation update for NL80211_CMD_PORT_AUTHORIZED event Drivers supporting 4-way handshake offload for AP/P2p-GO and STA/P2P-client should use this event to indicate that port has been authorized and open for regular data traffic, sending this event on completion of successful 4-way handshake. Signed-off-by: Vinayak Yadawad Link: https://lore.kernel.org/r/f746b59f41436e9df29c24688035fbc6eb91ab06.1699510229.git.vinayak.yadawad@broadcom.com [rewrite it all to not use the term 'GC' that we don't use in place of P2P-client] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 03e44823355e..0cd1da2c2902 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1135,11 +1135,15 @@ * @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously * configured PMK for the authenticator address identified by * %NL80211_ATTR_MAC. - * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates an 802.1X FT roam was - * completed successfully. Drivers that support 4 way handshake offload - * should send this event after indicating 802.1X FT assocation with - * %NL80211_CMD_ROAM. If the 4 way handshake failed %NL80211_CMD_DISCONNECT - * should be indicated instead. + * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates port is authorized and + * open for regular data traffic. For STA/P2P-client, this event is sent + * with AP MAC address and for AP/P2P-GO, the event carries the STA/P2P- + * client MAC address. + * Drivers that support 4 way handshake offload should send this event for + * STA/P2P-client after successful 4-way HS or after 802.1X FT following + * NL80211_CMD_CONNECT or NL80211_CMD_ROAM. Drivers using AP/P2P-GO 4-way + * handshake offload should send this event on successful completion of + * 4-way handshake with the peer (STA/P2P-client). * @NL80211_CMD_CONTROL_PORT_FRAME: Control Port (e.g. PAE) frame TX request * and RX notification. This command is used both as a request to transmit * a control port frame and as a notification that a control port frame -- cgit v1.2.3 From 4e86f32a13af1970d21be94f659cae56bbe487ee Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 20 Nov 2023 14:05:08 +0300 Subject: uapi: propagate __struct_group() attributes to the container union Recently the kernel test robot has reported an ARM-specific BUILD_BUG_ON() in an old and unmaintained wil6210 wireless driver. The problem comes from the structure packing rules of old ARM ABI ('-mabi=apcs-gnu'). For example, the following structure is packed to 18 bytes instead of 16: struct poorly_packed { unsigned int a; unsigned int b; unsigned short c; union { struct { unsigned short d; unsigned int e; } __attribute__((packed)); struct { unsigned short d; unsigned int e; } __attribute__((packed)) inner; }; } __attribute__((packed)); To fit it into 16 bytes, it's required to add packed attribute to the container union as well: struct poorly_packed { unsigned int a; unsigned int b; unsigned short c; union { struct { unsigned short d; unsigned int e; } __attribute__((packed)); struct { unsigned short d; unsigned int e; } __attribute__((packed)) inner; } __attribute__((packed)); } __attribute__((packed)); Thanks to Andrew Pinski of GCC team for sorting the things out at https://gcc.gnu.org/pipermail/gcc/2023-November/242888.html. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311150821.cI4yciFE-lkp@intel.com Signed-off-by: Dmitry Antipov Link: https://lore.kernel.org/r/20231120110607.98956-1-dmantipov@yandex.ru Fixes: 50d7bd38c3aa ("stddef: Introduce struct_group() helper macro") Signed-off-by: Kees Cook --- include/uapi/linux/stddef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 5c6c4269f7ef..2ec6f35cda32 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -27,7 +27,7 @@ union { \ struct { MEMBERS } ATTRS; \ struct TAG { MEMBERS } ATTRS NAME; \ - } + } ATTRS #ifdef __cplusplus /* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */ -- cgit v1.2.3 From 950ab53b77ab829defeb22bc98d40a5e926ae018 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 15:07:34 -0800 Subject: net: page_pool: implement GET in the netlink API Expose the very basic page pool information via netlink. Example using ynl-py for a system with 9 queues: $ ./cli.py --no-schema --spec netlink/specs/netdev.yaml \ --dump page-pool-get [{'id': 19, 'ifindex': 2, 'napi-id': 147}, {'id': 18, 'ifindex': 2, 'napi-id': 146}, {'id': 17, 'ifindex': 2, 'napi-id': 145}, {'id': 16, 'ifindex': 2, 'napi-id': 144}, {'id': 15, 'ifindex': 2, 'napi-id': 143}, {'id': 14, 'ifindex': 2, 'napi-id': 142}, {'id': 13, 'ifindex': 2, 'napi-id': 141}, {'id': 12, 'ifindex': 2, 'napi-id': 140}, {'id': 11, 'ifindex': 2, 'napi-id': 139}, {'id': 10, 'ifindex': 2, 'napi-id': 138}] Reviewed-by: Eric Dumazet Acked-by: Jesper Dangaard Brouer Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/uapi/linux/netdev.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 2943a151d4f1..176665bcf0da 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -64,11 +64,21 @@ enum { NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) }; +enum { + NETDEV_A_PAGE_POOL_ID = 1, + NETDEV_A_PAGE_POOL_IFINDEX, + NETDEV_A_PAGE_POOL_NAPI_ID, + + __NETDEV_A_PAGE_POOL_MAX, + NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, NETDEV_CMD_DEV_DEL_NTF, NETDEV_CMD_DEV_CHANGE_NTF, + NETDEV_CMD_PAGE_POOL_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3 From d2ef6aa077bdd0b3495dba5dcae6d3f19579b20b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 15:07:35 -0800 Subject: net: page_pool: add netlink notifications for state changes Generate netlink notifications about page pool state changes. Reviewed-by: Eric Dumazet Acked-by: Jesper Dangaard Brouer Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/uapi/linux/netdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 176665bcf0da..beb158872226 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -79,11 +79,15 @@ enum { NETDEV_CMD_DEV_DEL_NTF, NETDEV_CMD_DEV_CHANGE_NTF, NETDEV_CMD_PAGE_POOL_GET, + NETDEV_CMD_PAGE_POOL_ADD_NTF, + NETDEV_CMD_PAGE_POOL_DEL_NTF, + NETDEV_CMD_PAGE_POOL_CHANGE_NTF, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) }; #define NETDEV_MCGRP_MGMT "mgmt" +#define NETDEV_MCGRP_PAGE_POOL "page-pool" #endif /* _UAPI_LINUX_NETDEV_H */ -- cgit v1.2.3 From 7aee8429eedd0970d8add2fb5b856bfc5f5f1fc1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 15:07:36 -0800 Subject: net: page_pool: report amount of memory held by page pools Advanced deployments need the ability to check memory use of various system components. It makes it possible to make informed decisions about memory allocation and to find regressions and leaks. Report memory use of page pools. Report both number of references and bytes held. Acked-by: Jesper Dangaard Brouer Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/uapi/linux/netdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index beb158872226..26ae5bdd3187 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -68,6 +68,8 @@ enum { NETDEV_A_PAGE_POOL_ID = 1, NETDEV_A_PAGE_POOL_IFINDEX, NETDEV_A_PAGE_POOL_NAPI_ID, + NETDEV_A_PAGE_POOL_INFLIGHT, + NETDEV_A_PAGE_POOL_INFLIGHT_MEM, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) -- cgit v1.2.3 From 69cb4952b6f6a226c1c0a7ca400398aaa8f75cf2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 15:07:37 -0800 Subject: net: page_pool: report when page pool was destroyed Report when page pool was destroyed. Together with the inflight / memory use reporting this can serve as a replacement for the warning about leaked page pools we currently print to dmesg. Example output for a fake leaked page pool using some hacks in netdevsim (one "live" pool, and one "leaked" on the same dev): $ ./cli.py --no-schema --spec netlink/specs/netdev.yaml \ --dump page-pool-get [{'id': 2, 'ifindex': 3}, {'id': 1, 'ifindex': 3, 'destroyed': 133, 'inflight': 1}] Tested-by: Dragos Tatulea Reviewed-by: Eric Dumazet Acked-by: Jesper Dangaard Brouer Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/uapi/linux/netdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 26ae5bdd3187..756410274120 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -70,6 +70,7 @@ enum { NETDEV_A_PAGE_POOL_NAPI_ID, NETDEV_A_PAGE_POOL_INFLIGHT, NETDEV_A_PAGE_POOL_INFLIGHT_MEM, + NETDEV_A_PAGE_POOL_DETACH_TIME, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) -- cgit v1.2.3 From d49010adae737638447369a4eff8f1aab736b076 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 15:07:38 -0800 Subject: net: page_pool: expose page pool stats via netlink Dump the stats into netlink. More clever approaches like dumping the stats per-CPU for each CPU individually to see where the packets get consumed can be implemented in the future. A trimmed example from a real (but recently booted system): $ ./cli.py --no-schema --spec netlink/specs/netdev.yaml \ --dump page-pool-stats-get [{'info': {'id': 19, 'ifindex': 2}, 'alloc-empty': 48, 'alloc-fast': 3024, 'alloc-refill': 0, 'alloc-slow': 48, 'alloc-slow-high-order': 0, 'alloc-waive': 0, 'recycle-cache-full': 0, 'recycle-cached': 0, 'recycle-released-refcnt': 0, 'recycle-ring': 0, 'recycle-ring-full': 0}, {'info': {'id': 18, 'ifindex': 2}, 'alloc-empty': 66, 'alloc-fast': 11811, 'alloc-refill': 35, 'alloc-slow': 66, 'alloc-slow-high-order': 0, 'alloc-waive': 0, 'recycle-cache-full': 1145, 'recycle-cached': 6541, 'recycle-released-refcnt': 0, 'recycle-ring': 1275, 'recycle-ring-full': 0}, {'info': {'id': 17, 'ifindex': 2}, 'alloc-empty': 73, 'alloc-fast': 62099, 'alloc-refill': 413, ... Acked-by: Jesper Dangaard Brouer Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/uapi/linux/netdev.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 756410274120..2b37233e00c0 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -76,6 +76,24 @@ enum { NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) }; +enum { + NETDEV_A_PAGE_POOL_STATS_INFO = 1, + NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST = 8, + NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW, + NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER, + NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY, + NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL, + NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT, + + __NETDEV_A_PAGE_POOL_STATS_MAX, + NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -85,6 +103,7 @@ enum { NETDEV_CMD_PAGE_POOL_ADD_NTF, NETDEV_CMD_PAGE_POOL_DEL_NTF, NETDEV_CMD_PAGE_POOL_CHANGE_NTF, + NETDEV_CMD_PAGE_POOL_STATS_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3 From b9873755a6c8ccfce79094c4dce9efa3ecb1a749 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 11 Oct 2023 21:35:22 +0000 Subject: misc: Add Nitro Secure Module driver When running Linux inside a Nitro Enclave, the hypervisor provides a special virtio device called "Nitro Security Module" (NSM). This device has 3 main functions: 1) Provide attestation reports 2) Modify PCR state 3) Provide entropy This patch adds a driver for NSM that exposes a /dev/nsm device node which user space can issue an ioctl on this device with raw NSM CBOR formatted commands to request attestation documents, influence PCR states, read entropy and enumerate status of the device. In addition, the driver implements a hwrng backend. Originally-by: Petre Eftime Signed-off-by: Alexander Graf Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/20231011213522.51781-1-graf@amazon.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/nsm.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/uapi/linux/nsm.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nsm.h b/include/uapi/linux/nsm.h new file mode 100644 index 000000000000..e529f232f6c0 --- /dev/null +++ b/include/uapi/linux/nsm.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + */ + +#ifndef __UAPI_LINUX_NSM_H +#define __UAPI_LINUX_NSM_H + +#include +#include + +#define NSM_MAGIC 0x0A + +#define NSM_REQUEST_MAX_SIZE 0x1000 +#define NSM_RESPONSE_MAX_SIZE 0x3000 + +struct nsm_iovec { + __u64 addr; /* Virtual address of target buffer */ + __u64 len; /* Length of target buffer */ +}; + +/* Raw NSM message. Only available with CAP_SYS_ADMIN. */ +struct nsm_raw { + /* Request from user */ + struct nsm_iovec request; + /* Response to user */ + struct nsm_iovec response; +}; +#define NSM_IOCTL_RAW _IOWR(NSM_MAGIC, 0x0, struct nsm_raw) + +#endif /* __UAPI_LINUX_NSM_H */ -- cgit v1.2.3 From e56fdbfb06e26a7066b070967badef4148528df2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 25 Nov 2023 20:31:27 +0100 Subject: bpf: Add link_info support for uprobe multi link Adding support to get uprobe_link details through bpf_link_info interface. Adding new struct uprobe_multi to struct bpf_link_info to carry the uprobe_multi link details. The uprobe_multi.count is passed from user space to denote size of array fields (offsets/ref_ctr_offsets/cookies). The actual array size is stored back to uprobe_multi.count (allowing user to find out the actual array size) and array fields are populated up to the user passed size. All the non-array fields (path/count/flags/pid) are always set. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20231125193130.834322-4-jolsa@kernel.org --- include/uapi/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7a5498242eaa..e88746ba7d21 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6562,6 +6562,16 @@ struct bpf_link_info { __u32 flags; __u64 missed; } kprobe_multi; + struct { + __aligned_u64 path; + __aligned_u64 offsets; + __aligned_u64 ref_ctr_offsets; + __aligned_u64 cookies; + __u32 path_size; /* in/out: real path size on success, including zero byte */ + __u32 count; /* in/out: uprobe_multi offsets/ref_ctr_offsets/cookies count */ + __u32 flags; + __u32 pid; + } uprobe_multi; struct { __u32 type; /* enum bpf_perf_event_type */ __u32 :32; -- cgit v1.2.3 From 341ac980eab90ac1f6c22ee9f9da83ed9604d899 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:07 -0800 Subject: xsk: Support tx_metadata_len For zerocopy mode, tx_desc->addr can point to an arbitrary offset and carry some TX metadata in the headroom. For copy mode, there is no way currently to populate skb metadata. Introduce new tx_metadata_len umem config option that indicates how many bytes to treat as metadata. Metadata bytes come prior to tx_desc address (same as in RX case). The size of the metadata has mostly the same constraints as XDP: - less than 256 bytes - 8-byte aligned (compared to 4-byte alignment on xdp, due to 8-byte timestamp in the completion) - non-zero This data is not interpreted in any way right now. Reviewed-by: Song Yoong Siang Signed-off-by: Stanislav Fomichev Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20231127190319.1190813-2-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/if_xdp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 8d48863472b9..2ecf79282c26 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -76,6 +76,7 @@ struct xdp_umem_reg { __u32 chunk_size; __u32 headroom; __u32 flags; + __u32 tx_metadata_len; }; struct xdp_statistics { -- cgit v1.2.3 From 48eb03dd26304c24f03bdbb9382e89c8564e71df Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:08 -0800 Subject: xsk: Add TX timestamp and TX checksum offload support This change actually defines the (initial) metadata layout that should be used by AF_XDP userspace (xsk_tx_metadata). The first field is flags which requests appropriate offloads, followed by the offload-specific fields. The supported per-device offloads are exported via netlink (new xsk-flags). The offloads themselves are still implemented in a bit of a framework-y fashion that's left from my initial kfunc attempt. I'm introducing new xsk_tx_metadata_ops which drivers are supposed to implement. The drivers are also supposed to call xsk_tx_metadata_request/xsk_tx_metadata_complete in the right places. Since xsk_tx_metadata_{request,_complete} are static inline, we don't incur any extra overhead doing indirect calls. The benefit of this scheme is as follows: - keeps all metadata layout parsing away from driver code - makes it easy to grep and see which drivers implement what - don't need any extra flags to maintain to keep track of what offloads are implemented; if the callback is implemented - the offload is supported (used by netlink reporting code) Two offloads are defined right now: 1. XDP_TXMD_FLAGS_CHECKSUM: skb-style csum_start+csum_offset 2. XDP_TXMD_FLAGS_TIMESTAMP: writes TX timestamp back into metadata area upon completion (tx_timestamp field) XDP_TXMD_FLAGS_TIMESTAMP is also implemented for XDP_COPY mode: it writes SW timestamp from the skb destructor (note I'm reusing hwtstamps to pass metadata pointer). The struct is forward-compatible and can be extended in the future by appending more fields. Reviewed-by: Song Yoong Siang Signed-off-by: Stanislav Fomichev Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20231127190319.1190813-3-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/if_xdp.h | 38 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/netdev.h | 16 ++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 2ecf79282c26..95de66d5a26c 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -106,6 +106,41 @@ struct xdp_options { #define XSK_UNALIGNED_BUF_ADDR_MASK \ ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1) +/* Request transmit timestamp. Upon completion, put it into tx_timestamp + * field of struct xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_TIMESTAMP (1 << 0) + +/* Request transmit checksum offload. Checksum start position and offset + * are communicated via csum_start and csum_offset fields of struct + * xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_CHECKSUM (1 << 1) + +/* AF_XDP offloads request. 'request' union member is consumed by the driver + * when the packet is being transmitted. 'completion' union member is + * filled by the driver when the transmit completion arrives. + */ +struct xsk_tx_metadata { + __u64 flags; + + union { + struct { + /* XDP_TXMD_FLAGS_CHECKSUM */ + + /* Offset from desc->addr where checksumming should start. */ + __u16 csum_start; + /* Offset from csum_start where checksum should be stored. */ + __u16 csum_offset; + } request; + + struct { + /* XDP_TXMD_FLAGS_TIMESTAMP */ + __u64 tx_timestamp; + } completion; + }; +}; + /* Rx/Tx descriptor */ struct xdp_desc { __u64 addr; @@ -122,4 +157,7 @@ struct xdp_desc { */ #define XDP_PKT_CONTD (1 << 0) +/* TX packet carries valid metadata. */ +#define XDP_TX_METADATA (1 << 1) + #endif /* _LINUX_IF_XDP_H */ diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 2943a151d4f1..48d5477a668c 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -53,12 +53,28 @@ enum netdev_xdp_rx_metadata { NETDEV_XDP_RX_METADATA_MASK = 3, }; +/** + * enum netdev_xsk_flags + * @NETDEV_XSK_FLAGS_TX_TIMESTAMP: HW timestamping egress packets is supported + * by the driver. + * @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the + * driver. + */ +enum netdev_xsk_flags { + NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1, + NETDEV_XSK_FLAGS_TX_CHECKSUM = 2, + + /* private: */ + NETDEV_XSK_FLAGS_MASK = 3, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, NETDEV_A_DEV_XDP_FEATURES, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, + NETDEV_A_DEV_XSK_FEATURES, __NETDEV_A_DEV_MAX, NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) -- cgit v1.2.3 From 11614723af26e7c32fcb704d8f30fdf60c1122dc Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:14 -0800 Subject: xsk: Add option to calculate TX checksum in SW For XDP_COPY mode, add a UMEM option XDP_UMEM_TX_SW_CSUM to call skb_checksum_help in transmit path. Might be useful to debugging issues with real hardware. I also use this mode in the selftests. Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231127190319.1190813-9-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/if_xdp.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 95de66d5a26c..d31698410410 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -33,7 +33,13 @@ #define XDP_USE_SG (1 << 4) /* Flags for xsk_umem_config flags */ -#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) +#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) + +/* Force checksum calculation in software. Can be used for testing or + * working around potential HW issues. This option causes performance + * degradation and only works in XDP_COPY mode. + */ +#define XDP_UMEM_TX_SW_CSUM (1 << 1) struct sockaddr_xdp { __u16 sxdp_family; -- cgit v1.2.3 From 6ebf6f90ab4ac09a76172a6d387e8819d3259595 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:45 -0800 Subject: mptcp: add mptcpi_subflows_total counter If the initial subflow has been removed, we cannot know without checking other counters, e.g. ss -ti | grep -c tcp-ulp-mptcp or getsockopt(SOL_MPTCP, MPTCP_FULL_INFO, ...) (or others except MPTCP_INFO of course) and then check mptcp_subflow_data->num_subflows to get the total amount of subflows. This patch adds a new counter mptcpi_subflows_total in mptcpi_flags to store the total amount of subflows, including the initial one. A new helper __mptcp_has_initial_subflow() is added to check whether the initial subflow has been removed or not. With this helper, we can then compute the total amount of subflows from mptcp_info by doing something like: mptcpi_subflows_total = mptcpi_subflows + __mptcp_has_initial_subflow(msk). Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/428 Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-1-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index a6451561f3f8..74cfe496891e 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -57,6 +57,7 @@ struct mptcp_info { __u64 mptcpi_bytes_sent; __u64 mptcpi_bytes_received; __u64 mptcpi_bytes_acked; + __u8 mptcpi_subflows_total; }; /* MPTCP Reset reason codes, rfc8684 */ -- cgit v1.2.3 From c55e0a55b165202f18cbc4a20650d2e1becd5507 Mon Sep 17 00:00:00 2001 From: Tyler Fanelli Date: Tue, 19 Sep 2023 22:40:00 -0400 Subject: fuse: Rename DIRECT_IO_RELAX to DIRECT_IO_ALLOW_MMAP Although DIRECT_IO_RELAX's initial usage is to allow shared mmap, its description indicates a purpose of reducing memory footprint. This may imply that it could be further used to relax other DIRECT_IO operations in the future. Replace it with a flag DIRECT_IO_ALLOW_MMAP which does only one thing, allow shared mmap of DIRECT_IO files while still bypassing the cache on regular reads and writes. [Miklos] Also Keep DIRECT_IO_RELAX definition for backward compatibility. Signed-off-by: Tyler Fanelli Fixes: e78662e818f9 ("fuse: add a new fuse init flag to relax restrictions in no cache mode") Cc: # v6.6 Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index db92a7202b34..e7418d15fe39 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -209,7 +209,7 @@ * - add FUSE_HAS_EXPIRE_ONLY * * 7.39 - * - add FUSE_DIRECT_IO_RELAX + * - add FUSE_DIRECT_IO_ALLOW_MMAP * - add FUSE_STATX and related structures */ @@ -409,8 +409,7 @@ struct fuse_file_lock { * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir, * symlink and mknod (single group that matches parent) * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation - * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now - * allow shared mmap + * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode. */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -449,7 +448,10 @@ struct fuse_file_lock { #define FUSE_HAS_INODE_DAX (1ULL << 33) #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) -#define FUSE_DIRECT_IO_RELAX (1ULL << 36) +#define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) + +/* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ +#define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP /** * CUSE INIT request/reply flags -- cgit v1.2.3 From 0d9e32a8075a6cccbab294f98ccf7d33821d9b1c Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 24 Nov 2023 16:26:24 +0200 Subject: media: uapi: Add controls for the THP7312 ISP The THP7312 is an external ISP from THine. As such, it implements a large number of parameters to control all aspects of the image processing. Many of those controls are already standard in V4L2, but some are fairly device-specific. Reserve a range of 32 controls for the device. The driver will implement 4 device-specific controls to start with, define and document them. 28 additional device-specific controls should be enough for future development. Co-developed-by: Paul Elder Signed-off-by: Paul Elder Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/thp7312.h | 19 +++++++++++++++++++ include/uapi/linux/v4l2-controls.h | 6 ++++++ 2 files changed, 25 insertions(+) create mode 100644 include/uapi/linux/thp7312.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/thp7312.h b/include/uapi/linux/thp7312.h new file mode 100644 index 000000000000..2b629e05daf9 --- /dev/null +++ b/include/uapi/linux/thp7312.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * THine THP7312 user space header file. + * + * Copyright (C) 2021 THine Electronics, Inc. + * Copyright (C) 2023 Ideas on Board Oy + */ + +#ifndef __UAPI_THP7312_H_ +#define __UAPI_THP7312_H_ + +#include + +#define V4L2_CID_THP7312_LOW_LIGHT_COMPENSATION (V4L2_CID_USER_THP7312_BASE + 0x01) +#define V4L2_CID_THP7312_AUTO_FOCUS_METHOD (V4L2_CID_USER_THP7312_BASE + 0x02) +#define V4L2_CID_THP7312_NOISE_REDUCTION_AUTO (V4L2_CID_USER_THP7312_BASE + 0x03) +#define V4L2_CID_THP7312_NOISE_REDUCTION_ABSOLUTE (V4L2_CID_USER_THP7312_BASE + 0x04) + +#endif /* __UAPI_THP7312_H_ */ diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 68db66d4aae8..99c3f5e99da7 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -209,6 +209,12 @@ enum v4l2_colorfx { */ #define V4L2_CID_USER_NPCM_BASE (V4L2_CID_USER_BASE + 0x11b0) +/* + * The base for THine THP7312 driver controls. + * We reserve 32 controls for this driver. + */ +#define V4L2_CID_USER_THP7312_BASE (V4L2_CID_USER_BASE + 0x11c0) + /* MPEG-class control IDs */ /* The MPEG controls are applicable to all codec controls * and the 'MPEG' part of the define is historical */ -- cgit v1.2.3 From 2112f3a28e8d9d1e2faaa32d124b450bde055b72 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 23 Oct 2023 21:19:22 +0300 Subject: media: v4l2-subdev: Fix indentation in v4l2-subdev.h Fix a simple indentation issue in the v4l2-subdev.h header. Fixes: f57fa2959244 ("media: v4l2-subdev: Add new ioctl for client capabilities") Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/v4l2-subdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h index b383c2fe0cf3..f0fbb4a7c150 100644 --- a/include/uapi/linux/v4l2-subdev.h +++ b/include/uapi/linux/v4l2-subdev.h @@ -239,7 +239,7 @@ struct v4l2_subdev_routing { * set (which is the default), the 'stream' fields will be forced to 0 by the * kernel. */ - #define V4L2_SUBDEV_CLIENT_CAP_STREAMS (1ULL << 0) +#define V4L2_SUBDEV_CLIENT_CAP_STREAMS (1ULL << 0) /** * struct v4l2_subdev_client_capability - Capabilities of the client accessing -- cgit v1.2.3 From b89710bd215e650f0aaf8ffe7104413d46d44392 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Mon, 27 Nov 2023 18:34:28 +0100 Subject: iio: add modifiers for A and B ultraviolet light Currently there are only two modifiers for ultraviolet light: a generic one for any ultraviolet light (IIO_MOD_LIGHT_UV) and one for deep ultraviolet (IIO_MOD_LIGHT_DUV), which is also referred as ultraviolet C (UV-C) band and covers short-wave ultraviolet. There are still no modifiers for the long-wave and medium-wave ultraviolet bands. These two bands are the main components used to obtain the UV index on the Earth's surface. Add modifiers for the ultraviolet A (UV-A) and ultraviolet B (UV-B) bands. Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20231110-veml6075-v3-1-6ee46775b422@gmail.com Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 9c2ffdcd6623..5060963707b1 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -91,6 +91,8 @@ enum iio_modifier { IIO_MOD_CO2, IIO_MOD_VOC, IIO_MOD_LIGHT_UV, + IIO_MOD_LIGHT_UVA, + IIO_MOD_LIGHT_UVB, IIO_MOD_LIGHT_DUV, IIO_MOD_PM1, IIO_MOD_PM2P5, -- cgit v1.2.3 From 2202844e4468c7539dba0c0b06577c93735af952 Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Mon, 6 Nov 2023 15:22:23 +0800 Subject: vfio/migration: Add debugfs to live migration driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are multiple devices, software and operational steps involved in the process of live migration. An error occurred on any node may cause the live migration operation to fail. This complex process makes it very difficult to locate and analyze the cause when the function fails. In order to quickly locate the cause of the problem when the live migration fails, I added a set of debugfs to the vfio live migration driver. +-------------------------------------------+ | | | | | QEMU | | | | | +---+----------------------------+----------+ | ^ | ^ | | | | | | | | v | v | +---------+--+ +---------+--+ |src vfio_dev| |dst vfio_dev| +--+---------+ +--+---------+ | ^ | ^ | | | | v | | | +-----------+----+ +-----------+----+ |src dev debugfs | |dst dev debugfs | +----------------+ +----------------+ The entire debugfs directory will be based on the definition of the CONFIG_DEBUG_FS macro. If this macro is not enabled, the interfaces in vfio.h will be empty definitions, and the creation and initialization of the debugfs directory will not be executed. vfio | +--- | +---migration | +--state | +--- +---migration +--state debugfs will create a public root directory "vfio" file. then create a dev_name() file for each live migration device. First, create a unified state acquisition file of "migration" in this device directory. Then, create a public live migration state lookup file "state". Signed-off-by: Longfang Liu Reviewed-by: Cédric Le Goater Link: https://lore.kernel.org/r/20231106072225.28577-2-liulongfang@huawei.com Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 7f5fb010226d..2b68e6cdf190 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1219,6 +1219,7 @@ enum vfio_device_mig_state { VFIO_DEVICE_STATE_RUNNING_P2P = 5, VFIO_DEVICE_STATE_PRE_COPY = 6, VFIO_DEVICE_STATE_PRE_COPY_P2P = 7, + VFIO_DEVICE_STATE_NR, }; /** -- cgit v1.2.3 From 91051f003948432f83b5d2766eeb83b2b4993649 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 1 Dec 2023 15:49:52 +0100 Subject: tcp: Dump bound-only sockets in inet_diag. Walk the hashinfo->bhash2 table so that inet_diag can dump TCP sockets that are bound but haven't yet called connect() or listen(). The code is inspired by the ->lhash2 loop. However there's no manual test of the source port, since this kind of filtering is already handled by inet_diag_bc_sk(). Also, a maximum of 16 sockets are dumped at a time, to avoid running with bh disabled for too long. There's no TCP state for bound but otherwise inactive sockets. Such sockets normally map to TCP_CLOSE. However, "ss -l", which is supposed to only dump listening sockets, actually requests the kernel to dump sockets in either the TCP_LISTEN or TCP_CLOSE states. To avoid dumping bound-only sockets with "ss -l", we therefore need to define a new pseudo-state (TCP_BOUND_INACTIVE) that user space will be able to set explicitly. With an IPv4, an IPv6 and an IPv6-only socket, bound respectively to 40000, 64000, 60000, an updated version of iproute2 could work as follow: $ ss -t state bound-inactive Recv-Q Send-Q Local Address:Port Peer Address:Port Process 0 0 0.0.0.0:40000 0.0.0.0:* 0 0 [::]:60000 [::]:* 0 0 *:64000 *:* Reviewed-by: Eric Dumazet Signed-off-by: Guillaume Nault Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/b3a84ae61e19c06806eea9c602b3b66e8f0cfc81.1701362867.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e88746ba7d21..b1e8c5bdfc82 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6902,6 +6902,7 @@ enum { BPF_TCP_LISTEN, BPF_TCP_CLOSING, /* Now a valid state */ BPF_TCP_NEW_SYN_RECV, + BPF_TCP_BOUND_INACTIVE, BPF_TCP_MAX_STATES /* Leave at the end! */ }; -- cgit v1.2.3 From bc877956272f0521fef107838555817112a450dc Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 1 Dec 2023 15:28:29 -0800 Subject: netdev-genl: spec: Extend netdev netlink spec in YAML for queue Add support in netlink spec(netdev.yaml) for queue information. Add code generated from the spec. Note: The "queue-type" attribute takes values 0 and 1 for rx and tx queue type respectively. Signed-off-by: Amritha Nambiar Reviewed-by: Sridhar Samudrala Link: https://lore.kernel.org/r/170147330963.5260.2576294626647300472.stgit@anambiarhost.jf.intel.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 6244c0164976..f857960a7f06 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -62,6 +62,11 @@ enum netdev_xsk_flags { NETDEV_XSK_FLAGS_TX_CHECKSUM = 2, }; +enum netdev_queue_type { + NETDEV_QUEUE_TYPE_RX, + NETDEV_QUEUE_TYPE_TX, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, @@ -104,6 +109,16 @@ enum { NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1) }; +enum { + NETDEV_A_QUEUE_ID = 1, + NETDEV_A_QUEUE_IFINDEX, + NETDEV_A_QUEUE_TYPE, + NETDEV_A_QUEUE_NAPI_ID, + + __NETDEV_A_QUEUE_MAX, + NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -114,6 +129,7 @@ enum { NETDEV_CMD_PAGE_POOL_DEL_NTF, NETDEV_CMD_PAGE_POOL_CHANGE_NTF, NETDEV_CMD_PAGE_POOL_STATS_GET, + NETDEV_CMD_QUEUE_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3 From ff9991499fb53575c45eb92cd064bcd7141bb572 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 1 Dec 2023 15:28:51 -0800 Subject: netdev-genl: spec: Extend netdev netlink spec in YAML for NAPI Add support in netlink spec(netdev.yaml) for napi related information. Add code generated from the spec. Signed-off-by: Amritha Nambiar Reviewed-by: Sridhar Samudrala Link: https://lore.kernel.org/r/170147333119.5260.7050639053080529108.stgit@anambiarhost.jf.intel.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index f857960a7f06..e7bdbcb01f22 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -109,6 +109,14 @@ enum { NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1) }; +enum { + NETDEV_A_NAPI_IFINDEX = 1, + NETDEV_A_NAPI_ID, + + __NETDEV_A_NAPI_MAX, + NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) +}; + enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, @@ -130,6 +138,7 @@ enum { NETDEV_CMD_PAGE_POOL_CHANGE_NTF, NETDEV_CMD_PAGE_POOL_STATS_GET, NETDEV_CMD_QUEUE_GET, + NETDEV_CMD_NAPI_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3 From 5a5131d66fe02337de0b1b2e021b58f0f55c6df5 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 1 Dec 2023 15:29:02 -0800 Subject: netdev-genl: spec: Add irq in netdev netlink YAML spec Add support in netlink spec(netdev.yaml) for interrupt number among the NAPI attributes. Add code generated from the spec. Signed-off-by: Amritha Nambiar Reviewed-by: Sridhar Samudrala Link: https://lore.kernel.org/r/170147334210.5260.18178387869057516983.stgit@anambiarhost.jf.intel.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index e7bdbcb01f22..30fea409b71e 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -112,6 +112,7 @@ enum { enum { NETDEV_A_NAPI_IFINDEX = 1, NETDEV_A_NAPI_ID, + NETDEV_A_NAPI_IRQ, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) -- cgit v1.2.3 From 8481a249a0eaf0000dbb18f7689ccd50ea9835cd Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 1 Dec 2023 15:29:13 -0800 Subject: netdev-genl: spec: Add PID in netdev netlink YAML spec Add support in netlink spec(netdev.yaml) for PID of the NAPI thread. Add code generated from the spec. Signed-off-by: Amritha Nambiar Reviewed-by: Sridhar Samudrala Link: https://lore.kernel.org/r/170147335301.5260.11872351477120434501.stgit@anambiarhost.jf.intel.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 30fea409b71e..424c5e28f495 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -113,6 +113,7 @@ enum { NETDEV_A_NAPI_IFINDEX = 1, NETDEV_A_NAPI_ID, NETDEV_A_NAPI_IRQ, + NETDEV_A_NAPI_PID, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) -- cgit v1.2.3 From 8ebe06611666a399162de31cdd6f2f48ffa87748 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 1 Dec 2023 16:19:42 +0800 Subject: net: bridge: add document for IFLA_BR enum Add document for IFLA_BR enum so we can use it in Documentation/networking/bridge.rst. Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Signed-off-by: Paolo Abeni --- include/uapi/linux/if_link.h | 280 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 280 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8181ef23a7a2..a5f873c85a72 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -461,6 +461,286 @@ enum in6_addr_gen_mode { /* Bridge section */ +/** + * DOC: Bridge enum definition + * + * Please *note* that the timer values in the following section are expected + * in clock_t format, which is seconds multiplied by USER_HZ (generally + * defined as 100). + * + * @IFLA_BR_FORWARD_DELAY + * The bridge forwarding delay is the time spent in LISTENING state + * (before moving to LEARNING) and in LEARNING state (before moving + * to FORWARDING). Only relevant if STP is enabled. + * + * The valid values are between (2 * USER_HZ) and (30 * USER_HZ). + * The default value is (15 * USER_HZ). + * + * @IFLA_BR_HELLO_TIME + * The time between hello packets sent by the bridge, when it is a root + * bridge or a designated bridge. Only relevant if STP is enabled. + * + * The valid values are between (1 * USER_HZ) and (10 * USER_HZ). + * The default value is (2 * USER_HZ). + * + * @IFLA_BR_MAX_AGE + * The hello packet timeout is the time until another bridge in the + * spanning tree is assumed to be dead, after reception of its last hello + * message. Only relevant if STP is enabled. + * + * The valid values are between (6 * USER_HZ) and (40 * USER_HZ). + * The default value is (20 * USER_HZ). + * + * @IFLA_BR_AGEING_TIME + * Configure the bridge's FDB entries aging time. It is the time a MAC + * address will be kept in the FDB after a packet has been received from + * that address. After this time has passed, entries are cleaned up. + * Allow values outside the 802.1 standard specification for special cases: + * + * * 0 - entry never ages (all permanent) + * * 1 - entry disappears (no persistence) + * + * The default value is (300 * USER_HZ). + * + * @IFLA_BR_STP_STATE + * Turn spanning tree protocol on (*IFLA_BR_STP_STATE* > 0) or off + * (*IFLA_BR_STP_STATE* == 0) for this bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_PRIORITY + * Set this bridge's spanning tree priority, used during STP root bridge + * election. + * + * The valid values are between 0 and 65535. + * + * @IFLA_BR_VLAN_FILTERING + * Turn VLAN filtering on (*IFLA_BR_VLAN_FILTERING* > 0) or off + * (*IFLA_BR_VLAN_FILTERING* == 0). When disabled, the bridge will not + * consider the VLAN tag when handling packets. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_VLAN_PROTOCOL + * Set the protocol used for VLAN filtering. + * + * The valid values are 0x8100(802.1Q) or 0x88A8(802.1AD). The default value + * is 0x8100(802.1Q). + * + * @IFLA_BR_GROUP_FWD_MASK + * The group forwarding mask. This is the bitmask that is applied to + * decide whether to forward incoming frames destined to link-local + * addresses (of the form 01:80:C2:00:00:0X). + * + * The default value is 0, which means the bridge does not forward any + * link-local frames coming on this port. + * + * @IFLA_BR_ROOT_ID + * The bridge root id, read only. + * + * @IFLA_BR_BRIDGE_ID + * The bridge id, read only. + * + * @IFLA_BR_ROOT_PORT + * The bridge root port, read only. + * + * @IFLA_BR_ROOT_PATH_COST + * The bridge root path cost, read only. + * + * @IFLA_BR_TOPOLOGY_CHANGE + * The bridge topology change, read only. + * + * @IFLA_BR_TOPOLOGY_CHANGE_DETECTED + * The bridge topology change detected, read only. + * + * @IFLA_BR_HELLO_TIMER + * The bridge hello timer, read only. + * + * @IFLA_BR_TCN_TIMER + * The bridge tcn timer, read only. + * + * @IFLA_BR_TOPOLOGY_CHANGE_TIMER + * The bridge topology change timer, read only. + * + * @IFLA_BR_GC_TIMER + * The bridge gc timer, read only. + * + * @IFLA_BR_GROUP_ADDR + * Set the MAC address of the multicast group this bridge uses for STP. + * The address must be a link-local address in standard Ethernet MAC address + * format. It is an address of the form 01:80:C2:00:00:0X, with X in [0, 4..f]. + * + * The default value is 0. + * + * @IFLA_BR_FDB_FLUSH + * Flush bridge's fdb dynamic entries. + * + * @IFLA_BR_MCAST_ROUTER + * Set bridge's multicast router if IGMP snooping is enabled. + * The valid values are: + * + * * 0 - disabled. + * * 1 - automatic (queried). + * * 2 - permanently enabled. + * + * The default value is 1. + * + * @IFLA_BR_MCAST_SNOOPING + * Turn multicast snooping on (*IFLA_BR_MCAST_SNOOPING* > 0) or off + * (*IFLA_BR_MCAST_SNOOPING* == 0). + * + * The default value is 1. + * + * @IFLA_BR_MCAST_QUERY_USE_IFADDR + * If enabled use the bridge's own IP address as source address for IGMP + * queries (*IFLA_BR_MCAST_QUERY_USE_IFADDR* > 0) or the default of 0.0.0.0 + * (*IFLA_BR_MCAST_QUERY_USE_IFADDR* == 0). + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MCAST_QUERIER + * Enable (*IFLA_BR_MULTICAST_QUERIER* > 0) or disable + * (*IFLA_BR_MULTICAST_QUERIER* == 0) IGMP querier, ie sending of multicast + * queries by the bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MCAST_HASH_ELASTICITY + * Set multicast database hash elasticity, It is the maximum chain length in + * the multicast hash table. This attribute is *deprecated* and the value + * is always 16. + * + * @IFLA_BR_MCAST_HASH_MAX + * Set maximum size of the multicast hash table + * + * The default value is 4096, the value must be a power of 2. + * + * @IFLA_BR_MCAST_LAST_MEMBER_CNT + * The Last Member Query Count is the number of Group-Specific Queries + * sent before the router assumes there are no local members. The Last + * Member Query Count is also the number of Group-and-Source-Specific + * Queries sent before the router assumes there are no listeners for a + * particular source. + * + * The default value is 2. + * + * @IFLA_BR_MCAST_STARTUP_QUERY_CNT + * The Startup Query Count is the number of Queries sent out on startup, + * separated by the Startup Query Interval. + * + * The default value is 2. + * + * @IFLA_BR_MCAST_LAST_MEMBER_INTVL + * The Last Member Query Interval is the Max Response Time inserted into + * Group-Specific Queries sent in response to Leave Group messages, and + * is also the amount of time between Group-Specific Query messages. + * + * The default value is (1 * USER_HZ). + * + * @IFLA_BR_MCAST_MEMBERSHIP_INTVL + * The interval after which the bridge will leave a group, if no membership + * reports for this group are received. + * + * The default value is (260 * USER_HZ). + * + * @IFLA_BR_MCAST_QUERIER_INTVL + * The interval between queries sent by other routers. if no queries are + * seen after this delay has passed, the bridge will start to send its own + * queries (as if *IFLA_BR_MCAST_QUERIER_INTVL* was enabled). + * + * The default value is (255 * USER_HZ). + * + * @IFLA_BR_MCAST_QUERY_INTVL + * The Query Interval is the interval between General Queries sent by + * the Querier. + * + * The default value is (125 * USER_HZ). The minimum value is (1 * USER_HZ). + * + * @IFLA_BR_MCAST_QUERY_RESPONSE_INTVL + * The Max Response Time used to calculate the Max Resp Code inserted + * into the periodic General Queries. + * + * The default value is (10 * USER_HZ). + * + * @IFLA_BR_MCAST_STARTUP_QUERY_INTVL + * The interval between queries in the startup phase. + * + * The default value is (125 * USER_HZ) / 4. The minimum value is (1 * USER_HZ). + * + * @IFLA_BR_NF_CALL_IPTABLES + * Enable (*NF_CALL_IPTABLES* > 0) or disable (*NF_CALL_IPTABLES* == 0) + * iptables hooks on the bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_NF_CALL_IP6TABLES + * Enable (*NF_CALL_IP6TABLES* > 0) or disable (*NF_CALL_IP6TABLES* == 0) + * ip6tables hooks on the bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_NF_CALL_ARPTABLES + * Enable (*NF_CALL_ARPTABLES* > 0) or disable (*NF_CALL_ARPTABLES* == 0) + * arptables hooks on the bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_VLAN_DEFAULT_PVID + * VLAN ID applied to untagged and priority-tagged incoming packets. + * + * The default value is 1. Setting to the special value 0 makes all ports of + * this bridge not have a PVID by default, which means that they will + * not accept VLAN-untagged traffic. + * + * @IFLA_BR_PAD + * Bridge attribute padding type for netlink message. + * + * @IFLA_BR_VLAN_STATS_ENABLED + * Enable (*IFLA_BR_VLAN_STATS_ENABLED* == 1) or disable + * (*IFLA_BR_VLAN_STATS_ENABLED* == 0) per-VLAN stats accounting. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MCAST_STATS_ENABLED + * Enable (*IFLA_BR_MCAST_STATS_ENABLED* > 0) or disable + * (*IFLA_BR_MCAST_STATS_ENABLED* == 0) multicast (IGMP/MLD) stats + * accounting. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MCAST_IGMP_VERSION + * Set the IGMP version. + * + * The valid values are 2 and 3. The default value is 2. + * + * @IFLA_BR_MCAST_MLD_VERSION + * Set the MLD version. + * + * The valid values are 1 and 2. The default value is 1. + * + * @IFLA_BR_VLAN_STATS_PER_PORT + * Enable (*IFLA_BR_VLAN_STATS_PER_PORT* == 1) or disable + * (*IFLA_BR_VLAN_STATS_PER_PORT* == 0) per-VLAN per-port stats accounting. + * Can be changed only when there are no port VLANs configured. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MULTI_BOOLOPT + * The multi_boolopt is used to control new boolean options to avoid adding + * new netlink attributes. You can look at ``enum br_boolopt_id`` for those + * options. + * + * @IFLA_BR_MCAST_QUERIER_STATE + * Bridge mcast querier states, read only. + * + * @IFLA_BR_FDB_N_LEARNED + * The number of dynamically learned FDB entries for the current bridge, + * read only. + * + * @IFLA_BR_FDB_MAX_LEARNED + * Set the number of max dynamically learned FDB entries for the current + * bridge. + */ enum { IFLA_BR_UNSPEC, IFLA_BR_FORWARD_DELAY, -- cgit v1.2.3 From 8c4bafdb01cc7809903aced4981f563e3708ea37 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 1 Dec 2023 16:19:43 +0800 Subject: net: bridge: add document for IFLA_BRPORT enum Add document for IFLA_BRPORT enum so we can use it in Documentation/networking/bridge.rst. Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Signed-off-by: Paolo Abeni --- include/uapi/linux/if_link.h | 241 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a5f873c85a72..ab9bcff96e4d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -802,11 +802,252 @@ struct ifla_bridge_id { __u8 addr[6]; /* ETH_ALEN */ }; +/** + * DOC: Bridge mode enum definition + * + * @BRIDGE_MODE_HAIRPIN + * Controls whether traffic may be sent back out of the port on which it + * was received. This option is also called reflective relay mode, and is + * used to support basic VEPA (Virtual Ethernet Port Aggregator) + * capabilities. By default, this flag is turned off and the bridge will + * not forward traffic back out of the receiving port. + */ enum { BRIDGE_MODE_UNSPEC, BRIDGE_MODE_HAIRPIN, }; +/** + * DOC: Bridge port enum definition + * + * @IFLA_BRPORT_STATE + * The operation state of the port. Here are the valid values. + * + * * 0 - port is in STP *DISABLED* state. Make this port completely + * inactive for STP. This is also called BPDU filter and could be used + * to disable STP on an untrusted port, like a leaf virtual device. + * The traffic forwarding is also stopped on this port. + * * 1 - port is in STP *LISTENING* state. Only valid if STP is enabled + * on the bridge. In this state the port listens for STP BPDUs and + * drops all other traffic frames. + * * 2 - port is in STP *LEARNING* state. Only valid if STP is enabled on + * the bridge. In this state the port will accept traffic only for the + * purpose of updating MAC address tables. + * * 3 - port is in STP *FORWARDING* state. Port is fully active. + * * 4 - port is in STP *BLOCKING* state. Only valid if STP is enabled on + * the bridge. This state is used during the STP election process. + * In this state, port will only process STP BPDUs. + * + * @IFLA_BRPORT_PRIORITY + * The STP port priority. The valid values are between 0 and 255. + * + * @IFLA_BRPORT_COST + * The STP path cost of the port. The valid values are between 1 and 65535. + * + * @IFLA_BRPORT_MODE + * Set the bridge port mode. See *BRIDGE_MODE_HAIRPIN* for more details. + * + * @IFLA_BRPORT_GUARD + * Controls whether STP BPDUs will be processed by the bridge port. By + * default, the flag is turned off to allow BPDU processing. Turning this + * flag on will disable the bridge port if a STP BPDU packet is received. + * + * If the bridge has Spanning Tree enabled, hostile devices on the network + * may send BPDU on a port and cause network failure. Setting *guard on* + * will detect and stop this by disabling the port. The port will be + * restarted if the link is brought down, or removed and reattached. + * + * @IFLA_BRPORT_PROTECT + * Controls whether a given port is allowed to become a root port or not. + * Only used when STP is enabled on the bridge. By default the flag is off. + * + * This feature is also called root port guard. If BPDU is received from a + * leaf (edge) port, it should not be elected as root port. This could + * be used if using STP on a bridge and the downstream bridges are not fully + * trusted; this prevents a hostile guest from rerouting traffic. + * + * @IFLA_BRPORT_FAST_LEAVE + * This flag allows the bridge to immediately stop multicast traffic + * forwarding on a port that receives an IGMP Leave message. It is only used + * when IGMP snooping is enabled on the bridge. By default the flag is off. + * + * @IFLA_BRPORT_LEARNING + * Controls whether a given port will learn *source* MAC addresses from + * received traffic or not. Also controls whether dynamic FDB entries + * (which can also be added by software) will be refreshed by incoming + * traffic. By default this flag is on. + * + * @IFLA_BRPORT_UNICAST_FLOOD + * Controls whether unicast traffic for which there is no FDB entry will + * be flooded towards this port. By default this flag is on. + * + * @IFLA_BRPORT_PROXYARP + * Enable proxy ARP on this port. + * + * @IFLA_BRPORT_LEARNING_SYNC + * Controls whether a given port will sync MAC addresses learned on device + * port to bridge FDB. + * + * @IFLA_BRPORT_PROXYARP_WIFI + * Enable proxy ARP on this port which meets extended requirements by + * IEEE 802.11 and Hotspot 2.0 specifications. + * + * @IFLA_BRPORT_ROOT_ID + * + * @IFLA_BRPORT_BRIDGE_ID + * + * @IFLA_BRPORT_DESIGNATED_PORT + * + * @IFLA_BRPORT_DESIGNATED_COST + * + * @IFLA_BRPORT_ID + * + * @IFLA_BRPORT_NO + * + * @IFLA_BRPORT_TOPOLOGY_CHANGE_ACK + * + * @IFLA_BRPORT_CONFIG_PENDING + * + * @IFLA_BRPORT_MESSAGE_AGE_TIMER + * + * @IFLA_BRPORT_FORWARD_DELAY_TIMER + * + * @IFLA_BRPORT_HOLD_TIMER + * + * @IFLA_BRPORT_FLUSH + * Flush bridge ports' fdb dynamic entries. + * + * @IFLA_BRPORT_MULTICAST_ROUTER + * Configure the port's multicast router presence. A port with + * a multicast router will receive all multicast traffic. + * The valid values are: + * + * * 0 disable multicast routers on this port + * * 1 let the system detect the presence of routers (default) + * * 2 permanently enable multicast traffic forwarding on this port + * * 3 enable multicast routers temporarily on this port, not depending + * on incoming queries. + * + * @IFLA_BRPORT_PAD + * + * @IFLA_BRPORT_MCAST_FLOOD + * Controls whether a given port will flood multicast traffic for which + * there is no MDB entry. By default this flag is on. + * + * @IFLA_BRPORT_MCAST_TO_UCAST + * Controls whether a given port will replicate packets using unicast + * instead of multicast. By default this flag is off. + * + * This is done by copying the packet per host and changing the multicast + * destination MAC to a unicast one accordingly. + * + * *mcast_to_unicast* works on top of the multicast snooping feature of the + * bridge. Which means unicast copies are only delivered to hosts which + * are interested in unicast and signaled this via IGMP/MLD reports previously. + * + * This feature is intended for interface types which have a more reliable + * and/or efficient way to deliver unicast packets than broadcast ones + * (e.g. WiFi). + * + * However, it should only be enabled on interfaces where no IGMPv2/MLDv1 + * report suppression takes place. IGMP/MLD report suppression issue is + * usually overcome by the network daemon (supplicant) enabling AP isolation + * and by that separating all STAs. + * + * Delivery of STA-to-STA IP multicast is made possible again by enabling + * and utilizing the bridge hairpin mode, which considers the incoming port + * as a potential outgoing port, too (see *BRIDGE_MODE_HAIRPIN* option). + * Hairpin mode is performed after multicast snooping, therefore leading + * to only deliver reports to STAs running a multicast router. + * + * @IFLA_BRPORT_VLAN_TUNNEL + * Controls whether vlan to tunnel mapping is enabled on the port. + * By default this flag is off. + * + * @IFLA_BRPORT_BCAST_FLOOD + * Controls flooding of broadcast traffic on the given port. By default + * this flag is on. + * + * @IFLA_BRPORT_GROUP_FWD_MASK + * Set the group forward mask. This is a bitmask that is applied to + * decide whether to forward incoming frames destined to link-local + * addresses. The addresses of the form are 01:80:C2:00:00:0X (defaults + * to 0, which means the bridge does not forward any link-local frames + * coming on this port). + * + * @IFLA_BRPORT_NEIGH_SUPPRESS + * Controls whether neighbor discovery (arp and nd) proxy and suppression + * is enabled on the port. By default this flag is off. + * + * @IFLA_BRPORT_ISOLATED + * Controls whether a given port will be isolated, which means it will be + * able to communicate with non-isolated ports only. By default this + * flag is off. + * + * @IFLA_BRPORT_BACKUP_PORT + * Set a backup port. If the port loses carrier all traffic will be + * redirected to the configured backup port. Set the value to 0 to disable + * it. + * + * @IFLA_BRPORT_MRP_RING_OPEN + * + * @IFLA_BRPORT_MRP_IN_OPEN + * + * @IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT + * The number of per-port EHT hosts limit. The default value is 512. + * Setting to 0 is not allowed. + * + * @IFLA_BRPORT_MCAST_EHT_HOSTS_CNT + * The current number of tracked hosts, read only. + * + * @IFLA_BRPORT_LOCKED + * Controls whether a port will be locked, meaning that hosts behind the + * port will not be able to communicate through the port unless an FDB + * entry with the unit's MAC address is in the FDB. The common use case is + * that hosts are allowed access through authentication with the IEEE 802.1X + * protocol or based on whitelists. By default this flag is off. + * + * Please note that secure 802.1X deployments should always use the + * *BR_BOOLOPT_NO_LL_LEARN* flag, to not permit the bridge to populate its + * FDB based on link-local (EAPOL) traffic received on the port. + * + * @IFLA_BRPORT_MAB + * Controls whether a port will use MAC Authentication Bypass (MAB), a + * technique through which select MAC addresses may be allowed on a locked + * port, without using 802.1X authentication. Packets with an unknown source + * MAC address generates a "locked" FDB entry on the incoming bridge port. + * The common use case is for user space to react to these bridge FDB + * notifications and optionally replace the locked FDB entry with a normal + * one, allowing traffic to pass for whitelisted MAC addresses. + * + * Setting this flag also requires *IFLA_BRPORT_LOCKED* and + * *IFLA_BRPORT_LEARNING*. *IFLA_BRPORT_LOCKED* ensures that unauthorized + * data packets are dropped, and *IFLA_BRPORT_LEARNING* allows the dynamic + * FDB entries installed by user space (as replacements for the locked FDB + * entries) to be refreshed and/or aged out. + * + * @IFLA_BRPORT_MCAST_N_GROUPS + * + * @IFLA_BRPORT_MCAST_MAX_GROUPS + * Sets the maximum number of MDB entries that can be registered for a + * given port. Attempts to register more MDB entries at the port than this + * limit allows will be rejected, whether they are done through netlink + * (e.g. the bridge tool), or IGMP or MLD membership reports. Setting a + * limit of 0 disables the limit. The default value is 0. + * + * @IFLA_BRPORT_NEIGH_VLAN_SUPPRESS + * Controls whether neighbor discovery (arp and nd) proxy and suppression is + * enabled for a given port. By default this flag is off. + * + * Note that this option only takes effect when *IFLA_BRPORT_NEIGH_SUPPRESS* + * is enabled for a given port. + * + * @IFLA_BRPORT_BACKUP_NHID + * The FDB nexthop object ID to attach to packets being redirected to a + * backup port that has VLAN tunnel mapping enabled (via the + * *IFLA_BRPORT_VLAN_TUNNEL* option). Setting a value of 0 (default) has + * the effect of not attaching any ID. + */ enum { IFLA_BRPORT_UNSPEC, IFLA_BRPORT_STATE, /* Spanning tree state */ -- cgit v1.2.3 From 4527358b76861dfd64ee34aba45d81648fbc8a61 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 30 Nov 2023 10:52:15 -0800 Subject: bpf: introduce BPF token object Add new kind of BPF kernel object, BPF token. BPF token is meant to allow delegating privileged BPF functionality, like loading a BPF program or creating a BPF map, from privileged process to a *trusted* unprivileged process, all while having a good amount of control over which privileged operations could be performed using provided BPF token. This is achieved through mounting BPF FS instance with extra delegation mount options, which determine what operations are delegatable, and also constraining it to the owning user namespace (as mentioned in the previous patch). BPF token itself is just a derivative from BPF FS and can be created through a new bpf() syscall command, BPF_TOKEN_CREATE, which accepts BPF FS FD, which can be attained through open() API by opening BPF FS mount point. Currently, BPF token "inherits" delegated command, map types, prog type, and attach type bit sets from BPF FS as is. In the future, having an BPF token as a separate object with its own FD, we can allow to further restrict BPF token's allowable set of things either at the creation time or after the fact, allowing the process to guard itself further from unintentionally trying to load undesired kind of BPF programs. But for now we keep things simple and just copy bit sets as is. When BPF token is created from BPF FS mount, we take reference to the BPF super block's owning user namespace, and then use that namespace for checking all the {CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN} capabilities that are normally only checked against init userns (using capable()), but now we check them using ns_capable() instead (if BPF token is provided). See bpf_token_capable() for details. Such setup means that BPF token in itself is not sufficient to grant BPF functionality. User namespaced process has to *also* have necessary combination of capabilities inside that user namespace. So while previously CAP_BPF was useless when granted within user namespace, now it gains a meaning and allows container managers and sys admins to have a flexible control over which processes can and need to use BPF functionality within the user namespace (i.e., container in practice). And BPF FS delegation mount options and derived BPF tokens serve as a per-container "flag" to grant overall ability to use bpf() (plus further restrict on which parts of bpf() syscalls are treated as namespaced). Note also, BPF_TOKEN_CREATE command itself requires ns_capable(CAP_BPF) within the BPF FS owning user namespace, rounding up the ns_capable() story of BPF token. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231130185229.2688956-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e88746ba7d21..d4a567e5bc3c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -847,6 +847,36 @@ union bpf_iter_link_info { * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * + * BPF_TOKEN_CREATE + * Description + * Create BPF token with embedded information about what + * BPF-related functionality it allows: + * - a set of allowed bpf() syscall commands; + * - a set of allowed BPF map types to be created with + * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; + * - a set of allowed BPF program types and BPF program attach + * types to be loaded with BPF_PROG_LOAD command, if + * BPF_PROG_LOAD itself is allowed. + * + * BPF token is created (derived) from an instance of BPF FS, + * assuming it has necessary delegation mount options specified. + * This BPF token can be passed as an extra parameter to various + * bpf() syscall commands to grant BPF subsystem functionality to + * unprivileged processes. + * + * When created, BPF token is "associated" with the owning + * user namespace of BPF FS instance (super block) that it was + * derived from, and subsequent BPF operations performed with + * BPF token would be performing capabilities checks (i.e., + * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within + * that user namespace. Without BPF token, such capabilities + * have to be granted in init user namespace, making bpf() + * syscall incompatible with user namespace, for the most part. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -901,6 +931,8 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, + BPF_TOKEN_CREATE, + __MAX_BPF_CMD, }; enum bpf_map_type { @@ -1712,6 +1744,11 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; + struct { /* struct used by BPF_TOKEN_CREATE command */ + __u32 flags; + __u32 bpffs_fd; + } token_create; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF -- cgit v1.2.3 From 688b7270b3cb75e8ac78123d719967db40336e5b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 30 Nov 2023 10:52:16 -0800 Subject: bpf: add BPF token support to BPF_MAP_CREATE command Allow providing token_fd for BPF_MAP_CREATE command to allow controlled BPF map creation from unprivileged process through delegated BPF token. Wire through a set of allowed BPF map types to BPF token, derived from BPF FS at BPF token creation time. This, in combination with allowed_cmds allows to create a narrowly-focused BPF token (controlled by privileged agent) with a restrictive set of BPF maps that application can attempt to create. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231130185229.2688956-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d4a567e5bc3c..0bba3392b17a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -983,6 +983,7 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, + __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -1433,6 +1434,7 @@ union bpf_attr { * to using 5 hash functions). */ __u64 map_extra; + __u32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ -- cgit v1.2.3 From ee54b1a910e4d49c9a104f31ae3f5b979131adf8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 30 Nov 2023 10:52:17 -0800 Subject: bpf: add BPF token support to BPF_BTF_LOAD command Accept BPF token FD in BPF_BTF_LOAD command to allow BTF data loading through delegated BPF token. BTF loading is a pretty straightforward operation, so as long as BPF token is created with allow_cmds granting BPF_BTF_LOAD command, kernel proceeds to parsing BTF data and creating BTF object. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231130185229.2688956-6-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0bba3392b17a..9f9989e0d062 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1616,6 +1616,7 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 btf_log_true_size; + __u32 btf_token_fd; }; struct { -- cgit v1.2.3 From e1cef620f598853a90f17701fcb1057a6768f7b8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 30 Nov 2023 10:52:18 -0800 Subject: bpf: add BPF token support to BPF_PROG_LOAD command Add basic support of BPF token to BPF_PROG_LOAD. Wire through a set of allowed BPF program types and attach types, derived from BPF FS at BPF token creation time. Then make sure we perform bpf_token_capable() checks everywhere where it's relevant. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231130185229.2688956-7-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9f9989e0d062..4df2d025c784 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1028,6 +1028,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, + __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -1504,6 +1505,7 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 log_true_size; + __u32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit v1.2.3 From 7065eefb38f16c91e9ace36fb7c873e4c9857c27 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 6 Dec 2023 11:09:20 -0800 Subject: bpf: rename MAX_BPF_LINK_TYPE into __MAX_BPF_LINK_TYPE for consistency To stay consistent with the naming pattern used for similar cases in BPF UAPI (__MAX_BPF_ATTACH_TYPE, etc), rename MAX_BPF_LINK_TYPE into __MAX_BPF_LINK_TYPE. Also similar to MAX_BPF_ATTACH_TYPE and MAX_BPF_REG, add: #define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE Not all __MAX_xxx enums have such #define, so I'm not sure if we should add it or not, but I figured I'll start with a completely backwards compatible way, and we can drop that, if necessary. Also adjust a selftest that used MAX_BPF_LINK_TYPE enum. Suggested-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20231206190920.1651226-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4df2d025c784..e0545201b55f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1108,9 +1108,11 @@ enum bpf_link_type { BPF_LINK_TYPE_TCX = 11, BPF_LINK_TYPE_UPROBE_MULTI = 12, BPF_LINK_TYPE_NETKIT = 13, - MAX_BPF_LINK_TYPE, + __MAX_BPF_LINK_TYPE, }; +#define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE + enum bpf_perf_event_type { BPF_PERF_EVENT_UNSPEC = 0, BPF_PERF_EVENT_UPROBE = 1, -- cgit v1.2.3 From cb46fca88d14939da2785567253d0a297f31be27 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 29 Aug 2023 08:20:14 -0700 Subject: cxl: Add Support for Get Timestamp Add the call to the UAPI such that userspace may corelate the timestamps from the device log with system wall time, if, for example there's any sort of inaccuracy or skew in the device. Signed-off-by: Davidlohr Bueso Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20230829152014.15452-1-dave@stgolabs.net Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 14bc6e742148..42066f4eb890 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -46,6 +46,7 @@ ___C(GET_SCAN_MEDIA_CAPS, "Get Scan Media Capabilities"), \ ___DEPRECATED(SCAN_MEDIA, "Scan Media"), \ ___DEPRECATED(GET_SCAN_MEDIA, "Get Scan Media Results"), \ + ___C(GET_TIMESTAMP, "Get Timestamp"), \ ___C(MAX, "invalid / last command") #define ___C(a, b) CXL_MEM_COMMAND_ID_##a -- cgit v1.2.3 From 6d72283526090850274d065cd5d60af732cc5fc8 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Thu, 2 Nov 2023 16:21:28 +0000 Subject: KVM x86/xen: add an override for PVCLOCK_TSC_STABLE_BIT Unless explicitly told to do so (by passing 'clocksource=tsc' and 'tsc=stable:socket', and then jumping through some hoops concerning potential CPU hotplug) Xen will never use TSC as its clocksource. Hence, by default, a Xen guest will not see PVCLOCK_TSC_STABLE_BIT set in either the primary or secondary pvclock memory areas. This has led to bugs in some guest kernels which only become evident if PVCLOCK_TSC_STABLE_BIT *is* set in the pvclocks. Hence, to support such guests, give the VMM a new Xen HVM config flag to tell KVM to forcibly clear the bit in the Xen pvclocks. Signed-off-by: Paul Durrant Reviewed-by: David Woodhouse Link: https://lore.kernel.org/r/20231102162128.2353459-1-paul@xen.org Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e9cb2df67a1d..175420b26e36 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1318,6 +1318,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) struct kvm_xen_hvm_config { __u32 flags; -- cgit v1.2.3 From a5d3df8ae13fada772fbce952e9ee7b3433dba16 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Nov 2023 10:34:03 +0100 Subject: KVM: remove deprecated UAPIs The deprecated interfaces were removed 15 years ago. KVM's device assignment was deprecated in 4.2 and removed 6.5 years ago; the only interest might be in compiling ancient versions of QEMU, but QEMU has been using its own imported copy of the kernel headers since June 2011. So again we go into archaeology territory; just remove the cruft. Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 90 ------------------------------------------------ 1 file changed, 90 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e9cb2df67a1d..b1f92a0edc35 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -16,76 +16,6 @@ #define KVM_API_VERSION 12 -/* *** Deprecated interfaces *** */ - -#define KVM_TRC_SHIFT 16 - -#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) -#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) - -#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) -#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) -#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) - -#define KVM_TRC_HEAD_SIZE 12 -#define KVM_TRC_CYCLE_SIZE 8 -#define KVM_TRC_EXTRA_MAX 7 - -#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) -#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) -#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) -#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) -#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) -#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) -#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) -#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) -#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) -#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) -#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) -#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) -#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) -#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) -#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) -#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) -#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) -#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) -#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) -#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) -#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) -#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) -#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) -#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) - -struct kvm_user_trace_setup { - __u32 buf_size; - __u32 buf_nr; -}; - -#define __KVM_DEPRECATED_MAIN_W_0x06 \ - _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) -#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07) -#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08) - -#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq) - -struct kvm_breakpoint { - __u32 enabled; - __u32 padding; - __u64 address; -}; - -struct kvm_debug_guest { - __u32 enabled; - __u32 pad; - struct kvm_breakpoint breakpoints[4]; - __u32 singlestep; -}; - -#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest) - -/* *** End of deprecated interfaces *** */ - - /* for KVM_SET_USER_MEMORY_REGION */ struct kvm_userspace_memory_region { __u32 slot; @@ -967,9 +897,6 @@ struct kvm_ppc_resize_hpt { */ #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ #define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) -#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 -#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 -#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 #define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) #define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list) @@ -1536,20 +1463,8 @@ struct kvm_s390_ucas_mapping { _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) #define KVM_UNREGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) -#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ - struct kvm_assigned_pci_dev) #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) -/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ -#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70 -#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) -#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ - struct kvm_assigned_pci_dev) -#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \ - struct kvm_assigned_msix_nr) -#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \ - struct kvm_assigned_msix_entry) -#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) #define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) @@ -1566,9 +1481,6 @@ struct kvm_s390_ucas_mapping { * KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */ #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) -/* Available with KVM_CAP_PCI_2_3 */ -#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ - struct kvm_assigned_pci_dev) /* Available with KVM_CAP_SIGNAL_MSI */ #define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) /* Available with KVM_CAP_PPC_GET_SMMU_INFO */ @@ -1621,8 +1533,6 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) -/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ -#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87 #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) -- cgit v1.2.3 From e6a9a2cbc13bf43e4c03f57666e93d511249d5d7 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 6 Nov 2023 14:09:58 -0800 Subject: fs/proc/task_mmu: report SOFT_DIRTY bits through the PAGEMAP_SCAN ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PAGEMAP_SCAN ioctl returns information regarding page table entries. It is more efficient compared to reading pagemap files. CRIU can start to utilize this ioctl, but it needs info about soft-dirty bits to track memory changes. We are aware of a new method for tracking memory changes implemented in the PAGEMAP_SCAN ioctl. For CRIU, the primary advantage of this method is its usability by unprivileged users. However, it is not feasible to transparently replace the soft-dirty tracker with the new one. The main problem here is userfault descriptors that have to be preserved between pre-dump iterations. It means criu continues supporting the soft-dirty method to avoid breakage for current users. The new method will be implemented as a separate feature. [avagin@google.com: update tools/include/uapi/linux/fs.h] Link: https://lkml.kernel.org/r/20231107164139.576046-1-avagin@google.com Link: https://lkml.kernel.org/r/20231106220959.296568-1-avagin@google.com Signed-off-by: Andrei Vagin Reviewed-by: Muhammad Usama Anjum Cc: Michał Mirosław Signed-off-by: Andrew Morton --- include/uapi/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index da43810b7485..48ad69f7722e 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -316,6 +316,7 @@ typedef int __bitwise __kernel_rwf_t; #define PAGE_IS_SWAPPED (1 << 4) #define PAGE_IS_PFNZERO (1 << 5) #define PAGE_IS_HUGE (1 << 6) +#define PAGE_IS_SOFT_DIRTY (1 << 7) /* * struct page_region - Page region with flags -- cgit v1.2.3 From 46eae99ef73302f9fb3dddcd67c374b3dffe8fd6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 25 Oct 2023 16:02:02 +0200 Subject: add statmount(2) syscall Add a way to query attributes of a single mount instead of having to parse the complete /proc/$PID/mountinfo, which might be huge. Lookup the mount the new 64bit mount ID. If a mount needs to be queried based on path, then statx(2) can be used to first query the mount ID belonging to the path. Design is based on a suggestion by Linus: "So I'd suggest something that is very much like "statfsat()", which gets a buffer and a length, and returns an extended "struct statfs" *AND* just a string description at the end." The interface closely mimics that of statx. Handle ASCII attributes by appending after the end of the structure (as per above suggestion). Pointers to strings are stored in u64 members to make the structure the same regardless of pointer size. Strings are nul terminated. Link: https://lore.kernel.org/all/CAHk-=wh5YifP7hzKSbwJj94+DZ2czjrZsczy6GBimiogZws=rg@mail.gmail.com/ Signed-off-by: Miklos Szeredi Link: https://lore.kernel.org/r/20231025140205.3586473-5-mszeredi@redhat.com Reviewed-by: Ian Kent [Christian Brauner : various minor changes] Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index bb242fdcfe6b..afdf4f2f6672 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -138,4 +138,57 @@ struct mount_attr { /* List of all mount_attr versions. */ #define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ + +/* + * Structure for getting mount/superblock/filesystem info with statmount(2). + * + * The interface is similar to statx(2): individual fields or groups can be + * selected with the @mask argument of statmount(). Kernel will set the @mask + * field according to the supported fields. + * + * If string fields are selected, then the caller needs to pass a buffer that + * has space after the fixed part of the structure. Nul terminated strings are + * copied there and offsets relative to @str are stored in the relevant fields. + * If the buffer is too small, then EOVERFLOW is returned. The actually used + * size is returned in @size. + */ +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 __spare1; + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 __spare2[50]; + char str[]; /* Variable size part containing strings */ +}; + +struct mnt_id_req { + __u64 mnt_id; + __u64 request_mask; +}; + +/* + * @mask bits for statmount(2) + */ +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ + #endif /* _UAPI_LINUX_MOUNT_H */ -- cgit v1.2.3 From aa0887c4f18e280f8c2aa6964af602bd16c37f54 Mon Sep 17 00:00:00 2001 From: Vinayak Yadawad Date: Wed, 29 Nov 2023 18:20:43 +0530 Subject: wifi: nl80211: Extend del pmksa support for SAE and OWE security Current handling of del pmksa with SSID is limited to FILS security. In the current change the del pmksa support is extended to SAE/OWE security offloads as well. For OWE/SAE offloads, the PMK is generated and cached at driver/FW, so user app needs the capability to request cache deletion based on SSID for drivers supporting SAE/OWE offload. Signed-off-by: Vinayak Yadawad Link: https://msgid.link/ecdae726459e0944c377a6a6f6cb2c34d2e057d0.1701262123.git.vinayak.yadawad@broadcom.com [drop whitespace-damaged rdev_ops pointer completely, enabling tracing] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 0cd1da2c2902..8f42d598e285 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -568,7 +568,8 @@ * @NL80211_CMD_DEL_PMKSA: Delete a PMKSA cache entry, using %NL80211_ATTR_MAC * (for the BSSID) and %NL80211_ATTR_PMKID or using %NL80211_ATTR_SSID, * %NL80211_ATTR_FILS_CACHE_ID, and %NL80211_ATTR_PMKID in case of FILS - * authentication. + * authentication. Additionally in case of SAE offload and OWE offloads + * PMKSA entry can be deleted using %NL80211_ATTR_SSID. * @NL80211_CMD_FLUSH_PMKSA: Flush all PMKSA cache entries. * * @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain -- cgit v1.2.3 From d02a12b8e4bbd188f38321849791af02d494c7fd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Dec 2023 09:05:20 +0200 Subject: wifi: cfg80211: add BSS usage reporting Sometimes there may be reasons for which a BSS that's actually found in scan cannot be used to connect to, for example a nonprimary link of an NSTR mobile AP MLD cannot be used for normal direct connections to it. Not indicating these to userspace as we do now of course avoids being able to connect to them, but it's better if they're shown to userspace and it can make an appropriate decision, without e.g. doing an additional ML probe. Thus add an indication of what a BSS can be used for, currently "normal" and "MLD link", including a reason bitmap for it being not usable. The latter can be extended later for certain BSSes if there are other reasons they cannot be used. Signed-off-by: Johannes Berg Reviewed-by: Ilan Peer Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20231211085121.0464f25e0b1d.I9f70ca9f1440565ad9a5207d0f4d00a20cca67e7@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 8f42d598e285..07fc1fec4b12 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2831,6 +2831,10 @@ enum nl80211_commands { * @NL80211_ATTR_MLO_LINK_DISABLED: Flag attribute indicating that the link is * disabled. * + * @NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA: Include BSS usage data, i.e. + * include BSSes that can only be used in restricted scenarios and/or + * cannot be used at all. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3369,6 +3373,8 @@ enum nl80211_attrs { NL80211_ATTR_MLO_LINK_DISABLED, + NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5032,6 +5038,30 @@ enum nl80211_bss_scan_width { NL80211_BSS_CHAN_WIDTH_2, }; +/** + * enum nl80211_bss_use_for - bitmap indicating possible BSS use + * @NL80211_BSS_USE_FOR_NORMAL: Use this BSS for normal "connection", + * including IBSS/MBSS depending on the type. + * @NL80211_BSS_USE_FOR_MLD_LINK: This BSS can be used as a link in an + * MLO connection. Note that for an MLO connection, all links including + * the assoc link must have this flag set, and the assoc link must + * additionally have %NL80211_BSS_USE_FOR_NORMAL set. + */ +enum nl80211_bss_use_for { + NL80211_BSS_USE_FOR_NORMAL = 1 << 0, + NL80211_BSS_USE_FOR_MLD_LINK = 1 << 1, +}; + +/** + * enum nl80211_bss_cannot_use_reasons - reason(s) connection to a + * BSS isn't possible + * @NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY: NSTR nonprimary links aren't + * supported by the device, and this BSS entry represents one. + */ +enum nl80211_bss_cannot_use_reasons { + NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY = 1 << 0, +}; + /** * enum nl80211_bss - netlink attributes for a BSS * @@ -5084,6 +5114,14 @@ enum nl80211_bss_scan_width { * @NL80211_BSS_FREQUENCY_OFFSET: frequency offset in KHz * @NL80211_BSS_MLO_LINK_ID: MLO link ID of the BSS (u8). * @NL80211_BSS_MLD_ADDR: MLD address of this BSS if connected to it. + * @NL80211_BSS_USE_FOR: u32 bitmap attribute indicating what the BSS can be + * used for, see &enum nl80211_bss_use_for. + * @NL80211_BSS_CANNOT_USE_REASONS: Indicates the reason that this BSS cannot + * be used for all or some of the possible uses by the device reporting it, + * even though its presence was detected. + * This is a u64 attribute containing a bitmap of values from + * &enum nl80211_cannot_use_reasons, note that the attribute may be missing + * if no reasons are specified. * @__NL80211_BSS_AFTER_LAST: internal * @NL80211_BSS_MAX: highest BSS attribute */ @@ -5111,6 +5149,8 @@ enum nl80211_bss { NL80211_BSS_FREQUENCY_OFFSET, NL80211_BSS_MLO_LINK_ID, NL80211_BSS_MLD_ADDR, + NL80211_BSS_USE_FOR, + NL80211_BSS_CANNOT_USE_REASONS, /* keep last */ __NL80211_BSS_AFTER_LAST, -- cgit v1.2.3 From b61e6b41a2f6818ee7b8f92f670a8a6ebcd25a71 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 11 Dec 2023 09:05:22 +0200 Subject: wifi: cfg80211: Add support for setting TID to link mapping Add support for setting the TID to link mapping for a non-AP MLD station. This is useful in cases user space needs to restrict the possible set of active links, e.g., since it got a BSS Transition Management request forcing to use only a subset of the valid links etc. Signed-off-by: Ilan Peer Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20231211085121.da4d56a5f3ff.Iacf88e943326bf9c169c49b728c4a3445fdedc97@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 07fc1fec4b12..2d8468cbc457 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1328,6 +1328,11 @@ * Multi-Link reconfiguration. %NL80211_ATTR_MLO_LINKS is used to provide * information about the removed STA MLD setup links. * + * @NL80211_CMD_SET_TID_TO_LINK_MAPPING: Set the TID to Link Mapping for a + * non-AP MLD station. The %NL80211_ATTR_MLO_TTLM_DLINK and + * %NL80211_ATTR_MLO_TTLM_ULINK attributes are used to specify the + * TID to Link mapping for downlink/uplink traffic. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1583,6 +1588,8 @@ enum nl80211_commands { NL80211_CMD_LINKS_REMOVED, + NL80211_CMD_SET_TID_TO_LINK_MAPPING, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2835,6 +2842,15 @@ enum nl80211_commands { * include BSSes that can only be used in restricted scenarios and/or * cannot be used at all. * + * @NL80211_ATTR_MLO_TTLM_DLINK: Binary attribute specifying the downlink TID to + * link mapping. The length is 8 * sizeof(u16). For each TID the link + * mapping is as defined in section 9.4.2.314 (TID-To-Link Mapping element) + * in Draft P802.11be_D4.0. + * @NL80211_ATTR_MLO_TTLM_ULINK: Binary attribute specifying the uplink TID to + * link mapping. The length is 8 * sizeof(u16). For each TID the link + * mapping is as defined in section 9.4.2.314 (TID-To-Link Mapping element) + * in Draft P802.11be_D4.0. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3375,6 +3391,9 @@ enum nl80211_attrs { NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA, + NL80211_ATTR_MLO_TTLM_DLINK, + NL80211_ATTR_MLO_TTLM_ULINK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From dc18b89ab113e9c6c7a529316ddf7029fb55132d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Dec 2023 20:06:02 -0700 Subject: io_uring/openclose: add support for IORING_OP_FIXED_FD_INSTALL io_uring can currently open/close regular files or fixed/direct descriptors. Or you can instantiate a fixed descriptor from a regular one, and then close the regular descriptor. But you currently can't turn a purely fixed/direct descriptor into a regular file descriptor. IORING_OP_FIXED_FD_INSTALL adds support for installing a direct descriptor into the normal file table, just like receiving a file descriptor or opening a new file would do. This is all nicely abstracted into receive_fd(), and hence adding support for this is truly trivial. Since direct descriptors are only usable within io_uring itself, it can be useful to turn them into real file descriptors if they ever need to be accessed via normal syscalls. This can either be a transitory thing, or just a permanent transition for a given direct descriptor. By default, new fds are installed with O_CLOEXEC set. The application can disable O_CLOEXEC by setting IORING_FIXED_FD_NO_CLOEXEC in the sqe->install_fd_flags member. Suggested-by: Christian Brauner Reviewed-by: Christian Brauner Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index f1c16f817742..db4b913e6b39 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -71,6 +71,7 @@ struct io_uring_sqe { __u32 uring_cmd_flags; __u32 waitid_flags; __u32 futex_flags; + __u32 install_fd_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -253,6 +254,7 @@ enum io_uring_op { IORING_OP_FUTEX_WAIT, IORING_OP_FUTEX_WAKE, IORING_OP_FUTEX_WAITV, + IORING_OP_FIXED_FD_INSTALL, /* this goes last, obviously */ IORING_OP_LAST, @@ -386,6 +388,13 @@ enum { /* Pass through the flags from sqe->file_index to cqe->flags */ #define IORING_MSG_RING_FLAGS_PASS (1U << 1) +/* + * IORING_OP_FIXED_FD_INSTALL flags (sqe->install_fd_flags) + * + * IORING_FIXED_FD_NO_CLOEXEC Don't mark the fd as O_CLOEXEC + */ +#define IORING_FIXED_FD_NO_CLOEXEC (1U << 0) + /* * IO completion data structure (Completion Queue Entry) */ -- cgit v1.2.3 From 805d4311a54a25d7347684fdf778c6239b190864 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 13 Dec 2023 17:00:05 +0200 Subject: media: v4l2-subdev: Add which field to struct v4l2_subdev_frame_interval Due to a historical mishap, the v4l2_subdev_frame_interval structure is the only part of the V4L2 subdev userspace API that doesn't contain a 'which' field. This prevents trying frame intervals using the subdev 'TRY' state mechanism. Adding a 'which' field is simple as the structure has 8 reserved fields. This would however break userspace as the field is currently set to 0, corresponding to V4L2_SUBDEV_FORMAT_TRY, while the corresponding ioctls currently operate on the 'ACTIVE' state. We thus need to add a new subdev client cap, V4L2_SUBDEV_CLIENT_CAP_INTERVAL_USES_WHICH, to indicate that userspace is aware of this new field. All drivers that implement the subdev .get_frame_interval() and .set_frame_interval() operations are updated to return -EINVAL when operating on the TRY state, preserving the current behaviour. While at it, fix a bad copy&paste in the documentation of the struct v4l2_subdev_frame_interval_enum 'which' field. Signed-off-by: Laurent Pinchart Reviewed-by: Philipp Zabel # for imx-media Reviewed-by: Hans Verkuil Reviewed-by: Luca Ceresoli # for tegra-video Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Hans Verkuil --- include/uapi/linux/v4l2-subdev.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h index f0fbb4a7c150..7048c51581c6 100644 --- a/include/uapi/linux/v4l2-subdev.h +++ b/include/uapi/linux/v4l2-subdev.h @@ -116,13 +116,15 @@ struct v4l2_subdev_frame_size_enum { * @pad: pad number, as reported by the media API * @interval: frame interval in seconds * @stream: stream number, defined in subdev routing + * @which: interval type (from enum v4l2_subdev_format_whence) * @reserved: drivers and applications must zero this array */ struct v4l2_subdev_frame_interval { __u32 pad; struct v4l2_fract interval; __u32 stream; - __u32 reserved[8]; + __u32 which; + __u32 reserved[7]; }; /** @@ -133,7 +135,7 @@ struct v4l2_subdev_frame_interval { * @width: frame width in pixels * @height: frame height in pixels * @interval: frame interval in seconds - * @which: format type (from enum v4l2_subdev_format_whence) + * @which: interval type (from enum v4l2_subdev_format_whence) * @stream: stream number, defined in subdev routing * @reserved: drivers and applications must zero this array */ @@ -239,7 +241,14 @@ struct v4l2_subdev_routing { * set (which is the default), the 'stream' fields will be forced to 0 by the * kernel. */ -#define V4L2_SUBDEV_CLIENT_CAP_STREAMS (1ULL << 0) +#define V4L2_SUBDEV_CLIENT_CAP_STREAMS (1ULL << 0) + +/* + * The client is aware of the struct v4l2_subdev_frame_interval which field. If + * this is not set (which is the default), the which field is forced to + * V4L2_SUBDEV_FORMAT_ACTIVE by the kernel. + */ +#define V4L2_SUBDEV_CLIENT_CAP_INTERVAL_USES_WHICH (1ULL << 1) /** * struct v4l2_subdev_client_capability - Capabilities of the client accessing -- cgit v1.2.3 From e6795330f88b4f643c649a02662d47b779340535 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 5 Dec 2023 22:08:38 +0100 Subject: xdp: Add VLAN tag hint Implement functionality that enables drivers to expose VLAN tag to XDP code. VLAN tag is represented by 2 variables: - protocol ID, which is passed to bpf code in BE - VLAN TCI, in host byte order Acked-by: Stanislav Fomichev Signed-off-by: Larysa Zaremba Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/20231205210847.28460-10-larysa.zaremba@intel.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/netdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 6244c0164976..966638b08ccf 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -44,10 +44,13 @@ enum netdev_xdp_act { * timestamp via bpf_xdp_metadata_rx_timestamp(). * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet * hash via bpf_xdp_metadata_rx_hash(). + * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive + * packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag(). */ enum netdev_xdp_rx_metadata { NETDEV_XDP_RX_METADATA_TIMESTAMP = 1, NETDEV_XDP_RX_METADATA_HASH = 2, + NETDEV_XDP_RX_METADATA_VLAN_TAG = 4, }; /** -- cgit v1.2.3 From 13e59344fb9d3c9d3acd138ae320b5b67b658694 Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Tue, 12 Dec 2023 17:33:16 -0700 Subject: net: ethtool: add support for symmetric-xor RSS hash Symmetric RSS hash functions are beneficial in applications that monitor both Tx and Rx packets of the same flow (IDS, software firewalls, ..etc). Getting all traffic of the same flow on the same RX queue results in higher CPU cache efficiency. A NIC that supports "symmetric-xor" can achieve this RSS hash symmetry by XORing the source and destination fields and pass the values to the RSS hash algorithm. The user may request RSS hash symmetry for a specific algorithm, via: # ethtool -X eth0 hfunc symmetric-xor or turn symmetry off (asymmetric) by: # ethtool -X eth0 hfunc The specific fields for each flow type should then be specified as usual via: # ethtool -N|-U eth0 rx-flow-hash s|d|f|n Reviewed-by: Wojciech Drewek Signed-off-by: Ahmed Zaki Link: https://lore.kernel.org/r/20231213003321.605376-4-ahmed.zaki@intel.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 13 ++++++++++++- include/uapi/linux/ethtool_netlink.h | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index f7fba0dc87e5..0787d561ace0 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1266,6 +1266,8 @@ struct ethtool_rxfh_indir { * hardware hash key. * @hfunc: Defines the current RSS hash function used by HW (or to be set to). * Valid values are one of the %ETH_RSS_HASH_*. + * @input_xfrm: Defines how the input data is transformed. Valid values are one + * of %RXH_XFRM_*. * @rsvd8: Reserved for future use; see the note on reserved space. * @rsvd32: Reserved for future use; see the note on reserved space. * @rss_config: RX ring/queue index for each hash value i.e., indirection table @@ -1285,7 +1287,8 @@ struct ethtool_rxfh { __u32 indir_size; __u32 key_size; __u8 hfunc; - __u8 rsvd8[3]; + __u8 input_xfrm; + __u8 rsvd8[2]; __u32 rsvd32; __u32 rss_config[]; }; @@ -1992,6 +1995,14 @@ static inline int ethtool_validate_duplex(__u8 duplex) #define WOL_MODE_COUNT 8 +/* RSS hash function data + * XOR the corresponding source and destination fields of each specified + * protocol. Both copies of the XOR'ed fields are fed into the RSS and RXHASH + * calculation. Note that this XORing reduces the input set entropy and could + * be exploited to reduce the RSS queue spread. + */ +#define RXH_XFRM_SYM_XOR (1 << 0) + /* L2-L4 network traffic flow types */ #define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ #define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 73e2c10dc2cc..3f89074aa06c 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -908,6 +908,7 @@ enum { ETHTOOL_A_RSS_HFUNC, /* u32 */ ETHTOOL_A_RSS_INDIR, /* binary */ ETHTOOL_A_RSS_HKEY, /* binary */ + ETHTOOL_A_RSS_INPUT_XFRM, /* u32 */ __ETHTOOL_A_RSS_CNT, ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), -- cgit v1.2.3 From b4c2bea8ceaa50cd42a8f73667389d801a3ecf2d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 25 Oct 2023 16:02:03 +0200 Subject: add listmount(2) syscall Add way to query the children of a particular mount. This is a more flexible way to iterate the mount tree than having to parse /proc/self/mountinfo. Lookup the mount by the new 64bit mount ID. If a mount needs to be queried based on path, then statx(2) can be used to first query the mount ID belonging to the path. Return an array of new (64bit) mount ID's. Without privileges only mounts are listed which are reachable from the task's root. Folded into this patch are several later improvements. Keeping them separate would make the history pointlessly confusing: * Recursive listing of mounts is the default now (cf. [1]). * Remove explicit LISTMOUNT_UNREACHABLE flag (cf. [1]) and fail if mount is unreachable from current root. This also makes permission checking consistent with statmount() (cf. [3]). * Start listing mounts in unique mount ID order (cf. [2]) to allow continuing listmount() from a midpoint. * Allow to continue listmount(). The @request_mask parameter is renamed and to @param to be usable by both statmount() and listmount(). If @param is set to a mount id then listmount() will continue listing mounts from that id on. This allows listing mounts in multiple listmount invocations without having to resize the buffer. If @param is zero then the listing starts from the beginning (cf. [4]). * Don't return EOVERFLOW, instead return the buffer size which allows to detect a full buffer as well (cf. [4]). Signed-off-by: Miklos Szeredi Link: https://lore.kernel.org/r/20231025140205.3586473-6-mszeredi@redhat.com Reviewed-by: Ian Kent Link: https://lore.kernel.org/r/20231128160337.29094-2-mszeredi@redhat.com [1] (folded) Link: https://lore.kernel.org/r/20231128160337.29094-3-mszeredi@redhat.com [2] (folded) Link: https://lore.kernel.org/r/20231128160337.29094-4-mszeredi@redhat.com [3] (folded) Link: https://lore.kernel.org/r/20231128160337.29094-5-mszeredi@redhat.com [4] (folded) [Christian Brauner : various smaller fixes] Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index afdf4f2f6672..dc9a0112d819 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -176,9 +176,16 @@ struct statmount { char str[]; /* Variable size part containing strings */ }; +/* + * Structure for passing mount ID and miscellaneous parameters to statmount(2) + * and listmount(2). + * + * For statmount(2) @param represents the request mask. + * For listmount(2) @param represents the last listed mount id (or zero). + */ struct mnt_id_req { __u64 mnt_id; - __u64 request_mask; + __u64 param; }; /* @@ -191,4 +198,9 @@ struct mnt_id_req { #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +/* + * Special @mnt_id values that can be passed to listmount + */ +#define LSMT_ROOT 0xffffffffffffffff /* root mount */ + #endif /* _UAPI_LINUX_MOUNT_H */ -- cgit v1.2.3 From 35e27a5744131996061e6e323f1bcb4c827ae867 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 29 Nov 2023 12:27:15 +0100 Subject: fs: keep struct mnt_id_req extensible Make it extensible so that we have the liberty to reuse it in future mount-id based apis. Treat zero size as the first published struct. Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index dc9a0112d819..ad5478dbad00 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -184,10 +184,15 @@ struct statmount { * For listmount(2) @param represents the last listed mount id (or zero). */ struct mnt_id_req { + __u32 size; + __u32 spare; __u64 mnt_id; __u64 param; }; +/* List of all mnt_id_req versions. */ +#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ + /* * @mask bits for statmount(2) */ -- cgit v1.2.3 From 074b3cf442c518631f4b6d11d7fdfe143e17e955 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 12 Dec 2023 20:43:15 -0800 Subject: wifi: nl80211: fix grammar & spellos Correct spelling as reported by codespell. Correct run-on sentences and other grammar issues. Add hyphenation of adjectives. Correct some punctuation. Signed-off-by: Randy Dunlap Cc: Johannes Berg Cc: linux-wireless@vger.kernel.org Cc: Kalle Valo Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Link: https://msgid.link/20231213044315.19459-1-rdunlap@infradead.org Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 74 ++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 37 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 2d8468cbc457..a682b54bd3ba 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -72,7 +72,7 @@ * For drivers supporting TDLS with external setup (WIPHY_FLAG_SUPPORTS_TDLS * and WIPHY_FLAG_TDLS_EXTERNAL_SETUP), the station lifetime is as follows: * - a setup station entry is added, not yet authorized, without any rate - * or capability information, this just exists to avoid race conditions + * or capability information; this just exists to avoid race conditions * - when the TDLS setup is done, a single NL80211_CMD_SET_STATION is valid * to add rate and capability information to the station and at the same * time mark it authorized. @@ -87,7 +87,7 @@ * DOC: Frame transmission/registration support * * Frame transmission and registration support exists to allow userspace - * management entities such as wpa_supplicant react to management frames + * management entities such as wpa_supplicant to react to management frames * that are not being handled by the kernel. This includes, for example, * certain classes of action frames that cannot be handled in the kernel * for various reasons. @@ -113,7 +113,7 @@ * * Frame transmission allows userspace to send for example the required * responses to action frames. It is subject to some sanity checking, - * but many frames can be transmitted. When a frame was transmitted, its + * but many frames can be transmitted. When a frame is transmitted, its * status is indicated to the sending socket. * * For more technical details, see the corresponding command descriptions @@ -123,7 +123,7 @@ /** * DOC: Virtual interface / concurrency capabilities * - * Some devices are able to operate with virtual MACs, they can have + * Some devices are able to operate with virtual MACs; they can have * more than one virtual interface. The capability handling for this * is a bit complex though, as there may be a number of restrictions * on the types of concurrency that are supported. @@ -135,7 +135,7 @@ * Once concurrency is desired, more attributes must be observed: * To start with, since some interface types are purely managed in * software, like the AP-VLAN type in mac80211 for example, there's - * an additional list of these, they can be added at any time and + * an additional list of these; they can be added at any time and * are only restricted by some semantic restrictions (e.g. AP-VLAN * cannot be added without a corresponding AP interface). This list * is exported in the %NL80211_ATTR_SOFTWARE_IFTYPES attribute. @@ -164,7 +164,7 @@ * Packet coalesce feature helps to reduce number of received interrupts * to host by buffering these packets in firmware/hardware for some * predefined time. Received interrupt will be generated when one of the - * following events occur. + * following events occurs. * a) Expiration of hardware timer whose expiration time is set to maximum * coalescing delay of matching coalesce rule. * b) Coalescing buffer in hardware reaches its limit. @@ -174,7 +174,7 @@ * rule. * a) Maximum coalescing delay * b) List of packet patterns which needs to be matched - * c) Condition for coalescence. pattern 'match' or 'no match' + * c) Condition for coalescence: pattern 'match' or 'no match' * Multiple such rules can be created. */ @@ -213,7 +213,7 @@ /** * DOC: FILS shared key authentication offload * - * FILS shared key authentication offload can be advertized by drivers by + * FILS shared key authentication offload can be advertised by drivers by * setting @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD flag. The drivers that support * FILS shared key authentication offload should be able to construct the * authentication and association frames for FILS shared key authentication and @@ -239,7 +239,7 @@ * The PMKSA can be maintained in userspace persistently so that it can be used * later after reboots or wifi turn off/on also. * - * %NL80211_ATTR_FILS_CACHE_ID is the cache identifier advertized by a FILS + * %NL80211_ATTR_FILS_CACHE_ID is the cache identifier advertised by a FILS * capable AP supporting PMK caching. It specifies the scope within which the * PMKSAs are cached in an ESS. %NL80211_CMD_SET_PMKSA and * %NL80211_CMD_DEL_PMKSA are enhanced to allow support for PMKSA caching based @@ -290,12 +290,12 @@ * If the configuration needs to be applied for specific peer then the MAC * address of the peer needs to be passed in %NL80211_ATTR_MAC, otherwise the * configuration will be applied for all the connected peers in the vif except - * any peers that have peer specific configuration for the TID by default; if - * the %NL80211_TID_CONFIG_ATTR_OVERRIDE flag is set, peer specific values + * any peers that have peer-specific configuration for the TID by default; if + * the %NL80211_TID_CONFIG_ATTR_OVERRIDE flag is set, peer-specific values * will be overwritten. * - * All this configuration is valid only for STA's current connection - * i.e. the configuration will be reset to default when the STA connects back + * All this configuration is valid only for STA's current connection, + * i.e., the configuration will be reset to default when the STA connects back * after disconnection/roaming, and this configuration will be cleared when * the interface goes down. */ @@ -521,7 +521,7 @@ * %NL80211_ATTR_SCHED_SCAN_PLANS. If %NL80211_ATTR_SCHED_SCAN_PLANS is * not specified and only %NL80211_ATTR_SCHED_SCAN_INTERVAL is specified, * scheduled scan will run in an infinite loop with the specified interval. - * These attributes are mutually exculsive, + * These attributes are mutually exclusive, * i.e. NL80211_ATTR_SCHED_SCAN_INTERVAL must not be passed if * NL80211_ATTR_SCHED_SCAN_PLANS is defined. * If for some reason scheduled scan is aborted by the driver, all scan @@ -552,7 +552,7 @@ * %NL80211_CMD_STOP_SCHED_SCAN command is received or when the interface * is brought down while a scheduled scan was running. * - * @NL80211_CMD_GET_SURVEY: get survey resuls, e.g. channel occupation + * @NL80211_CMD_GET_SURVEY: get survey results, e.g. channel occupation * or noise level * @NL80211_CMD_NEW_SURVEY_RESULTS: survey data notification (as a reply to * NL80211_CMD_GET_SURVEY and on the "scan" multicast group) @@ -563,7 +563,7 @@ * using %NL80211_ATTR_SSID, %NL80211_ATTR_FILS_CACHE_ID, * %NL80211_ATTR_PMKID, and %NL80211_ATTR_PMK in case of FILS * authentication where %NL80211_ATTR_FILS_CACHE_ID is the identifier - * advertized by a FILS capable AP identifying the scope of PMKSA in an + * advertised by a FILS capable AP identifying the scope of PMKSA in an * ESS. * @NL80211_CMD_DEL_PMKSA: Delete a PMKSA cache entry, using %NL80211_ATTR_MAC * (for the BSSID) and %NL80211_ATTR_PMKID or using %NL80211_ATTR_SSID, @@ -608,7 +608,7 @@ * BSSID in case of station mode). %NL80211_ATTR_SSID is used to specify * the SSID (mainly for association, but is included in authentication * request, too, to help BSS selection. %NL80211_ATTR_WIPHY_FREQ + - * %NL80211_ATTR_WIPHY_FREQ_OFFSET is used to specify the frequence of the + * %NL80211_ATTR_WIPHY_FREQ_OFFSET is used to specify the frequency of the * channel in MHz. %NL80211_ATTR_AUTH_TYPE is used to specify the * authentication type. %NL80211_ATTR_IE is used to define IEs * (VendorSpecificInfo, but also including RSN IE and FT IEs) to be added @@ -817,7 +817,7 @@ * reached. * @NL80211_CMD_SET_CHANNEL: Set the channel (using %NL80211_ATTR_WIPHY_FREQ * and the attributes determining channel width) the given interface - * (identifed by %NL80211_ATTR_IFINDEX) shall operate on. + * (identified by %NL80211_ATTR_IFINDEX) shall operate on. * In case multiple channels are supported by the device, the mechanism * with which it switches channels is implementation-defined. * When a monitor interface is given, it can only switch channel while @@ -889,7 +889,7 @@ * inform userspace of the new replay counter. * * @NL80211_CMD_PMKSA_CANDIDATE: This is used as an event to inform userspace - * of PMKSA caching dandidates. + * of PMKSA caching candidates. * * @NL80211_CMD_TDLS_OPER: Perform a high-level TDLS command (e.g. link setup). * In addition, this can be used as an event to request userspace to take @@ -925,7 +925,7 @@ * * @NL80211_CMD_PROBE_CLIENT: Probe an associated station on an AP interface * by sending a null data frame to it and reporting when the frame is - * acknowleged. This is used to allow timing out inactive clients. Uses + * acknowledged. This is used to allow timing out inactive clients. Uses * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_MAC. The command returns a * direct reply with an %NL80211_ATTR_COOKIE that is later used to match * up the event with the request. The event includes the same data and @@ -1847,7 +1847,7 @@ enum nl80211_commands { * using %CMD_CONTROL_PORT_FRAME. If control port routing over NL80211 is * to be used then userspace must also use the %NL80211_ATTR_SOCKET_OWNER * flag. When used with %NL80211_ATTR_CONTROL_PORT_NO_PREAUTH, pre-auth - * frames are not forwared over the control port. + * frames are not forwarded over the control port. * * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver. * We recommend using nested, driver-specific attributes within this. @@ -1984,10 +1984,10 @@ enum nl80211_commands { * bit. Depending on which antennas are selected in the bitmap, 802.11n * drivers can derive which chainmasks to use (if all antennas belonging to * a particular chain are disabled this chain should be disabled) and if - * a chain has diversity antennas wether diversity should be used or not. + * a chain has diversity antennas whether diversity should be used or not. * HT capabilities (STBC, TX Beamforming, Antenna selection) can be * derived from the available chains after applying the antenna mask. - * Non-802.11n drivers can derive wether to use diversity or not. + * Non-802.11n drivers can derive whether to use diversity or not. * Drivers may reject configurations or RX/TX mask combinations they cannot * support by returning -EINVAL. * @@ -2557,7 +2557,7 @@ enum nl80211_commands { * from successful FILS authentication and is used with * %NL80211_CMD_CONNECT. * - * @NL80211_ATTR_FILS_CACHE_ID: A 2-octet identifier advertized by a FILS AP + * @NL80211_ATTR_FILS_CACHE_ID: A 2-octet identifier advertised by a FILS AP * identifying the scope of PMKSAs. This is used with * @NL80211_CMD_SET_PMKSA and @NL80211_CMD_DEL_PMKSA. * @@ -4200,7 +4200,7 @@ enum nl80211_wmm_rule { * (100 * dBm). * @NL80211_FREQUENCY_ATTR_DFS_STATE: current state for DFS * (enum nl80211_dfs_state) - * @NL80211_FREQUENCY_ATTR_DFS_TIME: time in miliseconds for how long + * @NL80211_FREQUENCY_ATTR_DFS_TIME: time in milliseconds for how long * this channel is in this DFS state. * @NL80211_FREQUENCY_ATTR_NO_HT40_MINUS: HT40- isn't possible with this * channel as the control channel @@ -5518,7 +5518,7 @@ enum nl80211_tx_rate_setting { * (%NL80211_TID_CONFIG_ATTR_TIDS, %NL80211_TID_CONFIG_ATTR_OVERRIDE). * @NL80211_TID_CONFIG_ATTR_PEER_SUPP: same as the previous per-vif one, but * per peer instead. - * @NL80211_TID_CONFIG_ATTR_OVERRIDE: flag attribue, if set indicates + * @NL80211_TID_CONFIG_ATTR_OVERRIDE: flag attribute, if set indicates * that the new configuration overrides all previous peer * configurations, otherwise previous peer specific configurations * should be left untouched. @@ -5901,7 +5901,7 @@ enum nl80211_attr_coalesce_rule { /** * enum nl80211_coalesce_condition - coalesce rule conditions - * @NL80211_COALESCE_CONDITION_MATCH: coalaesce Rx packets when patterns + * @NL80211_COALESCE_CONDITION_MATCH: coalesce Rx packets when patterns * in a rule are matched. * @NL80211_COALESCE_CONDITION_NO_MATCH: coalesce Rx packets when patterns * in a rule are not matched. @@ -6000,7 +6000,7 @@ enum nl80211_if_combination_attrs { * enum nl80211_plink_state - state of a mesh peer link finite state machine * * @NL80211_PLINK_LISTEN: initial state, considered the implicit - * state of non existent mesh peer links + * state of non-existent mesh peer links * @NL80211_PLINK_OPN_SNT: mesh plink open frame has been sent to * this mesh peer * @NL80211_PLINK_OPN_RCVD: mesh plink open frame has been received @@ -6293,7 +6293,7 @@ enum nl80211_feature_flags { * request to use RRM (see %NL80211_ATTR_USE_RRM) with * %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests, which will set * the ASSOC_REQ_USE_RRM flag in the association request even if - * NL80211_FEATURE_QUIET is not advertized. + * NL80211_FEATURE_QUIET is not advertised. * @NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER: This device supports MU-MIMO air * sniffer which means that it can be configured to hear packets from * certain groups which can be configured by the @@ -6305,7 +6305,7 @@ enum nl80211_feature_flags { * the BSS that the interface that requested the scan is connected to * (if available). * @NL80211_EXT_FEATURE_BSS_PARENT_TSF: Per BSS, this driver reports the - * time the last beacon/probe was received. For a non MLO connection, the + * time the last beacon/probe was received. For a non-MLO connection, the * time is the TSF of the BSS that the interface that requested the scan is * connected to (if available). For an MLO connection, the time is the TSF * of the BSS corresponding with link ID specified in the scan request (if @@ -6313,7 +6313,7 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of * channel dwell time. * @NL80211_EXT_FEATURE_BEACON_RATE_LEGACY: Driver supports beacon rate - * configuration (AP/mesh), supporting a legacy (non HT/VHT) rate. + * configuration (AP/mesh), supporting a legacy (non-HT/VHT) rate. * @NL80211_EXT_FEATURE_BEACON_RATE_HT: Driver supports beacon rate * configuration (AP/mesh) with HT rates. * @NL80211_EXT_FEATURE_BEACON_RATE_VHT: Driver supports beacon rate @@ -6649,7 +6649,7 @@ enum nl80211_timeout_reason { * request parameters IE in the probe request * @NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP: accept broadcast probe responses * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE: send probe request frames at - * rate of at least 5.5M. In case non OCE AP is discovered in the channel, + * rate of at least 5.5M. In case non-OCE AP is discovered in the channel, * only the first probe req in the channel will be sent in high rate. * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: allow probe request * tx deferral (dot11FILSProbeDelay shall be set to 15ms) @@ -6685,7 +6685,7 @@ enum nl80211_timeout_reason { * received on the 2.4/5 GHz channels to actively scan only the 6GHz * channels on which APs are expected to be found. Note that when not set, * the scan logic would scan all 6GHz channels, but since transmission of - * probe requests on non PSC channels is limited, it is highly likely that + * probe requests on non-PSC channels is limited, it is highly likely that * these channels would passively be scanned. Also note that when the flag * is set, in addition to the colocated APs, PSC channels would also be * scanned if the user space has asked for it. @@ -7017,7 +7017,7 @@ enum nl80211_nan_func_term_reason { * The instance ID for the follow up Service Discovery Frame. This is u8. * @NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID: relevant if the function's type * is follow up. This is a u8. - * The requestor instance ID for the follow up Service Discovery Frame. + * The requester instance ID for the follow up Service Discovery Frame. * @NL80211_NAN_FUNC_FOLLOW_UP_DEST: the MAC address of the recipient of the * follow up Service Discovery Frame. This is a binary attribute. * @NL80211_NAN_FUNC_CLOSE_RANGE: is this function limited for devices in a @@ -7407,7 +7407,7 @@ enum nl80211_peer_measurement_attrs { * @NL80211_PMSR_FTM_CAPA_ATTR_TRIGGER_BASED: flag attribute indicating if * trigger based ranging measurement is supported * @NL80211_PMSR_FTM_CAPA_ATTR_NON_TRIGGER_BASED: flag attribute indicating - * if non trigger based ranging measurement is supported + * if non-trigger-based ranging measurement is supported * * @NUM_NL80211_PMSR_FTM_CAPA_ATTR: internal * @NL80211_PMSR_FTM_CAPA_ATTR_MAX: highest attribute number @@ -7461,7 +7461,7 @@ enum nl80211_peer_measurement_ftm_capa { * if neither %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED nor * %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED is set, EDCA based * ranging will be used. - * @NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED: request non trigger based + * @NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED: request non-trigger-based * ranging measurement (flag) * This attribute and %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED are * mutually exclusive. @@ -7539,7 +7539,7 @@ enum nl80211_peer_measurement_ftm_failure_reasons { * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS: number of FTM Request frames * transmitted (u32, optional) * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES: number of FTM Request frames - * that were acknowleged (u32, optional) + * that were acknowledged (u32, optional) * @NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME: retry time received from the * busy peer (u32, seconds) * @NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP: actual number of bursts exponent -- cgit v1.2.3 From b059aef76c519226730dd18777c0e15dad4fae21 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Dec 2023 17:57:35 -0800 Subject: netlink: specs: mptcp: rename the MPTCP path management spec We assume in handful of places that the name of the spec is the same as the name of the family. We could fix that but it seems like a fair assumption to make. Rename the MPTCP spec instead. Reviewed-by: Mat Martineau Reviewed-by: Donald Hunter Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/mptcp_pm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h index b5d11aece408..50589e5dd6a3 100644 --- a/include/uapi/linux/mptcp_pm.h +++ b/include/uapi/linux/mptcp_pm.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/mptcp.yaml */ +/* Documentation/netlink/specs/mptcp_pm.yaml */ /* YNL-GEN uapi header */ #ifndef _UAPI_LINUX_MPTCP_PM_H -- cgit v1.2.3 From 61fbf20312bdd1394a9cac67ed8f706e205511af Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Thu, 14 Dec 2023 12:04:15 +0300 Subject: usb: gadget: f_fs: fix fortify warning When compiling with gcc version 14.0.0 20231206 (experimental) and CONFIG_FORTIFY_SOURCE=y, I've noticed the following warning: ... In function 'fortify_memcpy_chk', inlined from '__ffs_func_bind_do_os_desc' at drivers/usb/gadget/function/f_fs.c:2934:3: ./include/linux/fortify-string.h:588:25: warning: call to '__read_overflow2_field' declared with attribute warning: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Wattribute-warning] 588 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This call to 'memcpy()' is interpreted as an attempt to copy both 'CompatibleID' and 'SubCompatibleID' of 'struct usb_ext_compat_desc' from an address of the first one, which causes an overread warning. Since we actually want to copy both of them at once, use the convenient 'struct_group()' and 'sizeof_field()' here. Signed-off-by: Dmitry Antipov Link: https://lore.kernel.org/r/20231214090428.27292-1-dmantipov@yandex.ru Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/functionfs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h index d77ee6b65328..078098e73fd3 100644 --- a/include/uapi/linux/usb/functionfs.h +++ b/include/uapi/linux/usb/functionfs.h @@ -73,8 +73,10 @@ struct usb_os_desc_header { struct usb_ext_compat_desc { __u8 bFirstInterfaceNumber; __u8 Reserved1; - __u8 CompatibleID[8]; - __u8 SubCompatibleID[8]; + __struct_group(/* no tag */, IDs, /* no attrs */, + __u8 CompatibleID[8]; + __u8 SubCompatibleID[8]; + ); __u8 Reserved2[6]; }; -- cgit v1.2.3 From a7565fc8399725d00cc006c35d0621a5cb5f9554 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 13 Dec 2023 14:40:14 -0800 Subject: mei: fix spellos in mei.h For include/uapi/linux/mei.h, correct spellos reported by codespell. Signed-off-by: Randy Dunlap Cc: Tomas Winkler Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20231213224014.23187-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/mei.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mei.h b/include/uapi/linux/mei.h index 171c5cce3641..68a0272e99b7 100644 --- a/include/uapi/linux/mei.h +++ b/include/uapi/linux/mei.h @@ -100,14 +100,14 @@ struct mei_connect_client_data_vtag { * a FW client on a tagged channel. From this point on, every read * and write will communicate with the associated FW client * on the tagged channel. - * Upone close() the communication is terminated. + * Upon close() the communication is terminated. * * The IOCTL argument is a struct with a union that contains * the input parameter and the output parameter for this IOCTL. * * The input parameter is UUID of the FW Client, a vtag [0,255]. * The output parameter is the properties of the FW client - * (FW protocool version and max message size). + * (FW protocol version and max message size). * * Clients that do not support tagged connection * will respond with -EOPNOTSUPP. -- cgit v1.2.3 From 3634783be125381c6d390938d08cbcc47fed3b73 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 8 Dec 2023 15:28:01 +0000 Subject: binder: use enum for binder ioctls All of the other constants in this file are defined using enums, so make the constants more consistent by defining the ioctls in an enum as well. This is necessary for Rust Binder since the _IO macros are too complicated for bindgen to see that they expand to integer constants. Replacing the #defines with an enum forces bindgen to evaluate them properly, which allows us to access them from Rust. I originally intended to include this change in the first patch of the Rust Binder patchset [1], but at plumbers Carlos Llamas told me that this change has been discussed previously [2] and suggested that I send it upstream separately. Link: https://lore.kernel.org/rust-for-linux/20231101-rust-binder-v1-1-08ba9197f637@google.com/ [1] Link: https://lore.kernel.org/all/YoIK2l6xbQMPGZHy@kroah.com/ [2] Signed-off-by: Alice Ryhl Acked-by: Carlos Llamas Link: https://lore.kernel.org/r/20231208152801.3425772-1-aliceryhl@google.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder.h | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 5f636b5afcd7..d44a8118b2ed 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -251,20 +251,22 @@ struct binder_extended_error { __s32 param; }; -#define BINDER_WRITE_READ _IOWR('b', 1, struct binder_write_read) -#define BINDER_SET_IDLE_TIMEOUT _IOW('b', 3, __s64) -#define BINDER_SET_MAX_THREADS _IOW('b', 5, __u32) -#define BINDER_SET_IDLE_PRIORITY _IOW('b', 6, __s32) -#define BINDER_SET_CONTEXT_MGR _IOW('b', 7, __s32) -#define BINDER_THREAD_EXIT _IOW('b', 8, __s32) -#define BINDER_VERSION _IOWR('b', 9, struct binder_version) -#define BINDER_GET_NODE_DEBUG_INFO _IOWR('b', 11, struct binder_node_debug_info) -#define BINDER_GET_NODE_INFO_FOR_REF _IOWR('b', 12, struct binder_node_info_for_ref) -#define BINDER_SET_CONTEXT_MGR_EXT _IOW('b', 13, struct flat_binder_object) -#define BINDER_FREEZE _IOW('b', 14, struct binder_freeze_info) -#define BINDER_GET_FROZEN_INFO _IOWR('b', 15, struct binder_frozen_status_info) -#define BINDER_ENABLE_ONEWAY_SPAM_DETECTION _IOW('b', 16, __u32) -#define BINDER_GET_EXTENDED_ERROR _IOWR('b', 17, struct binder_extended_error) +enum { + BINDER_WRITE_READ = _IOWR('b', 1, struct binder_write_read), + BINDER_SET_IDLE_TIMEOUT = _IOW('b', 3, __s64), + BINDER_SET_MAX_THREADS = _IOW('b', 5, __u32), + BINDER_SET_IDLE_PRIORITY = _IOW('b', 6, __s32), + BINDER_SET_CONTEXT_MGR = _IOW('b', 7, __s32), + BINDER_THREAD_EXIT = _IOW('b', 8, __s32), + BINDER_VERSION = _IOWR('b', 9, struct binder_version), + BINDER_GET_NODE_DEBUG_INFO = _IOWR('b', 11, struct binder_node_debug_info), + BINDER_GET_NODE_INFO_FOR_REF = _IOWR('b', 12, struct binder_node_info_for_ref), + BINDER_SET_CONTEXT_MGR_EXT = _IOW('b', 13, struct flat_binder_object), + BINDER_FREEZE = _IOW('b', 14, struct binder_freeze_info), + BINDER_GET_FROZEN_INFO = _IOWR('b', 15, struct binder_frozen_status_info), + BINDER_ENABLE_ONEWAY_SPAM_DETECTION = _IOW('b', 16, __u32), + BINDER_GET_EXTENDED_ERROR = _IOWR('b', 17, struct binder_extended_error), +}; /* * NOTE: Two special error codes you should check for when calling -- cgit v1.2.3 From acd288666979a49538d70e0c0d86e1118b445058 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 22 Nov 2023 15:03:55 +0900 Subject: misc: pci_endpoint_test: Use INTX instead of LEGACY In the root complex pci endpoint test function driver, change macros and functions names using the term "legacy" to use "intx" instead to match the term used in the PCI specifications. Link: https://lore.kernel.org/r/20231122060406.14695-6-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Lorenzo Pieralisi Reviewed-by: Christoph Hellwig --- include/uapi/linux/pcitest.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h index f9c1af8d141b..94b46b043b53 100644 --- a/include/uapi/linux/pcitest.h +++ b/include/uapi/linux/pcitest.h @@ -11,7 +11,8 @@ #define __UAPI_LINUX_PCITEST_H #define PCITEST_BAR _IO('P', 0x1) -#define PCITEST_LEGACY_IRQ _IO('P', 0x2) +#define PCITEST_INTX_IRQ _IO('P', 0x2) +#define PCITEST_LEGACY_IRQ PCITEST_INTX_IRQ #define PCITEST_MSI _IOW('P', 0x3, int) #define PCITEST_WRITE _IOW('P', 0x4, unsigned long) #define PCITEST_READ _IOW('P', 0x5, unsigned long) -- cgit v1.2.3 From 13b127d2578432e1e521310b69944c5a1b30679c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 16 Dec 2023 13:30:00 +0100 Subject: devlink: add a command to set notification filter and use it for multicasts Currently the user listening on a socket for devlink notifications gets always all messages for all existing instances, even if he is interested only in one of those. That may cause unnecessary overhead on setups with thousands of instances present. User is currently able to narrow down the devlink objects replies to dump commands by specifying select attributes. Allow similar approach for notifications. Introduce a new devlink NOTIFY_FILTER_SET which the user passes the select attributes. Store these per-socket and use them for filtering messages during multicast send. Signed-off-by: Jiri Pirko Signed-off-by: Paolo Abeni --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b3c8383d342d..130cae0d3e20 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -139,6 +139,8 @@ enum devlink_command { DEVLINK_CMD_SELFTESTS_GET, /* can dump */ DEVLINK_CMD_SELFTESTS_RUN, + DEVLINK_CMD_NOTIFY_FILTER_SET, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 -- cgit v1.2.3 From d17aff807f845cf93926c28705216639c7279110 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 19 Dec 2023 07:37:35 -0800 Subject: Revert BPF token-related functionality This patch includes the following revert (one conflicting BPF FS patch and three token patch sets, represented by merge commits): - revert 0f5d5454c723 "Merge branch 'bpf-fs-mount-options-parsing-follow-ups'"; - revert 750e785796bb "bpf: Support uid and gid when mounting bpffs"; - revert 733763285acf "Merge branch 'bpf-token-support-in-libbpf-s-bpf-object'"; - revert c35919dcce28 "Merge branch 'bpf-token-and-bpf-fs-based-delegation'". Link: https://lore.kernel.org/bpf/CAHk-=wg7JuFYwGy=GOMbRCtOL+jwSQsdUaBsRWkDVYbxipbM5A@mail.gmail.com Signed-off-by: Andrii Nakryiko --- include/uapi/linux/bpf.h | 42 ------------------------------------------ 1 file changed, 42 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 42f4d3090efe..754e68ca8744 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -847,36 +847,6 @@ union bpf_iter_link_info { * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * - * BPF_TOKEN_CREATE - * Description - * Create BPF token with embedded information about what - * BPF-related functionality it allows: - * - a set of allowed bpf() syscall commands; - * - a set of allowed BPF map types to be created with - * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; - * - a set of allowed BPF program types and BPF program attach - * types to be loaded with BPF_PROG_LOAD command, if - * BPF_PROG_LOAD itself is allowed. - * - * BPF token is created (derived) from an instance of BPF FS, - * assuming it has necessary delegation mount options specified. - * This BPF token can be passed as an extra parameter to various - * bpf() syscall commands to grant BPF subsystem functionality to - * unprivileged processes. - * - * When created, BPF token is "associated" with the owning - * user namespace of BPF FS instance (super block) that it was - * derived from, and subsequent BPF operations performed with - * BPF token would be performing capabilities checks (i.e., - * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within - * that user namespace. Without BPF token, such capabilities - * have to be granted in init user namespace, making bpf() - * syscall incompatible with user namespace, for the most part. - * - * Return - * A new file descriptor (a nonnegative integer), or -1 if an - * error occurred (in which case, *errno* is set appropriately). - * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -931,8 +901,6 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, - BPF_TOKEN_CREATE, - __MAX_BPF_CMD, }; enum bpf_map_type { @@ -983,7 +951,6 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, - __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -1028,7 +995,6 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, - __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -1437,7 +1403,6 @@ union bpf_attr { * to using 5 hash functions). */ __u64 map_extra; - __u32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -1507,7 +1472,6 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 log_true_size; - __u32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1620,7 +1584,6 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 btf_log_true_size; - __u32 btf_token_fd; }; struct { @@ -1751,11 +1714,6 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; - struct { /* struct used by BPF_TOKEN_CREATE command */ - __u32 flags; - __u32 bpffs_fd; - } token_create; - } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF -- cgit v1.2.3 From 849d18e27be9a1253f2318cb4549cc857219d991 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 14 Dec 2023 14:21:05 -0800 Subject: md: Remove deprecated CONFIG_MD_LINEAR md-linear has been marked as deprecated for 2.5 years. Remove it. Cc: Christoph Hellwig Cc: Jens Axboe Cc: Neil Brown Cc: Guoqing Jiang Cc: Mateusz Grzonka Cc: Jes Sorensen Signed-off-by: Song Liu Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20231214222107.2016042-2-song@kernel.org --- include/uapi/linux/raid/md_p.h | 8 ++------ include/uapi/linux/raid/md_u.h | 7 +------ 2 files changed, 3 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index 6c0aa577730f..b36e282a413d 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -2,15 +2,11 @@ /* md_p.h : physical layout of Linux RAID devices Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - - You should have received a copy of the GNU General Public License - (for example /usr/src/linux/COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _MD_P_H @@ -237,7 +233,7 @@ struct mdp_superblock_1 { char set_name[32]; /* set and interpreted by user-space */ __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ - __le32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */ + __le32 level; /* -4 (multipath), 0,1,4,5 */ __le32 layout; /* only for raid5 and raid10 currently */ __le64 size; /* used size of component devices, in 512byte sectors */ diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h index 105307244961..c285f76e5d8d 100644 --- a/include/uapi/linux/raid/md_u.h +++ b/include/uapi/linux/raid/md_u.h @@ -2,15 +2,11 @@ /* md_u.h : user <=> kernel API between Linux raidtools and RAID drivers Copyright (C) 1998 Ingo Molnar - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - - You should have received a copy of the GNU General Public License - (for example /usr/src/linux/COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _UAPI_MD_U_H @@ -109,7 +105,6 @@ typedef struct mdu_array_info_s { /* non-obvious values for 'level' */ #define LEVEL_MULTIPATH (-4) -#define LEVEL_LINEAR (-1) #define LEVEL_FAULTY (-5) /* we need a value for 'no level specified' and 0 -- cgit v1.2.3 From d8730f0cf4effa015bc5e8840d8f8fb3cdb01aab Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 14 Dec 2023 14:21:06 -0800 Subject: md: Remove deprecated CONFIG_MD_MULTIPATH md-multipath has been marked as deprecated for 2.5 years. Remove it. Cc: Christoph Hellwig Cc: Jens Axboe Cc: Neil Brown Cc: Guoqing Jiang Cc: Mateusz Grzonka Cc: Jes Sorensen Signed-off-by: Song Liu Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20231214222107.2016042-3-song@kernel.org --- include/uapi/linux/raid/md_p.h | 2 +- include/uapi/linux/raid/md_u.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index b36e282a413d..5a43c23f53bf 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -233,7 +233,7 @@ struct mdp_superblock_1 { char set_name[32]; /* set and interpreted by user-space */ __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ - __le32 level; /* -4 (multipath), 0,1,4,5 */ + __le32 level; /* 0,1,4,5 */ __le32 layout; /* only for raid5 and raid10 currently */ __le64 size; /* used size of component devices, in 512byte sectors */ diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h index c285f76e5d8d..b44bbc356643 100644 --- a/include/uapi/linux/raid/md_u.h +++ b/include/uapi/linux/raid/md_u.h @@ -104,7 +104,6 @@ typedef struct mdu_array_info_s { } mdu_array_info_t; /* non-obvious values for 'level' */ -#define LEVEL_MULTIPATH (-4) #define LEVEL_FAULTY (-5) /* we need a value for 'no level specified' and 0 -- cgit v1.2.3 From 415c7451872b0d037760795edd3961eaa63276ea Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 14 Dec 2023 14:21:07 -0800 Subject: md: Remove deprecated CONFIG_MD_FAULTY md-faulty has been marked as deprecated for 2.5 years. Remove it. Cc: Christoph Hellwig Cc: Jens Axboe Cc: Neil Brown Cc: Guoqing Jiang Cc: Mateusz Grzonka Cc: Jes Sorensen Signed-off-by: Song Liu Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20231214222107.2016042-4-song@kernel.org --- include/uapi/linux/raid/md_u.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h index b44bbc356643..7be89a4906e7 100644 --- a/include/uapi/linux/raid/md_u.h +++ b/include/uapi/linux/raid/md_u.h @@ -103,9 +103,6 @@ typedef struct mdu_array_info_s { } mdu_array_info_t; -/* non-obvious values for 'level' */ -#define LEVEL_FAULTY (-5) - /* we need a value for 'no level specified' and 0 * means 'raid0', so we need something else. This is * for internal use only -- cgit v1.2.3 From 838bebb4c926a573839ff1bfe1b654a6ed0f9779 Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Tue, 19 Dec 2023 11:32:39 +0200 Subject: virtio: Define feature bit for administration virtqueue Introduce VIRTIO_F_ADMIN_VQ which is used for administration virtqueue support. Signed-off-by: Feng Liu Reviewed-by: Parav Pandit Reviewed-by: Jiri Pirko Acked-by: Michael S. Tsirkin Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20231219093247.170936-2-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/virtio_config.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index 8881aea60f6f..2445f365bce7 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -52,7 +52,7 @@ * rest are per-device feature bits. */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 41 +#define VIRTIO_TRANSPORT_F_END 42 #ifndef VIRTIO_CONFIG_NO_LEGACY /* Do we get callbacks when the ring is completely used, even if we've @@ -114,4 +114,10 @@ * This feature indicates that the driver can reset a queue individually. */ #define VIRTIO_F_RING_RESET 40 + +/* + * This feature indicates that the device support administration virtqueues. + */ +#define VIRTIO_F_ADMIN_VQ 41 + #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */ -- cgit v1.2.3 From fd27ef6b44bec26915c5b2b22c13856d9f0ba17a Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Tue, 19 Dec 2023 11:32:40 +0200 Subject: virtio-pci: Introduce admin virtqueue Introduce support for the admin virtqueue. By negotiating VIRTIO_F_ADMIN_VQ feature, driver detects capability and creates one administration virtqueue. Administration virtqueue implementation in virtio pci generic layer, enables multiple types of upper layer drivers such as vfio, net, blk to utilize it. Signed-off-by: Feng Liu Reviewed-by: Parav Pandit Reviewed-by: Jiri Pirko Acked-by: Michael S. Tsirkin Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20231219093247.170936-3-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/virtio_pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 44f4dd2add18..240ddeef7eae 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -175,6 +175,9 @@ struct virtio_pci_modern_common_cfg { __le16 queue_notify_data; /* read-write */ __le16 queue_reset; /* read-write */ + + __le16 admin_queue_index; /* read-only */ + __le16 admin_queue_num; /* read-only */ }; /* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ @@ -215,6 +218,8 @@ struct virtio_pci_cfg_cap { #define VIRTIO_PCI_COMMON_Q_USEDHI 52 #define VIRTIO_PCI_COMMON_Q_NDATA 56 #define VIRTIO_PCI_COMMON_Q_RESET 58 +#define VIRTIO_PCI_COMMON_ADM_Q_IDX 60 +#define VIRTIO_PCI_COMMON_ADM_Q_NUM 62 #endif /* VIRTIO_PCI_NO_MODERN */ -- cgit v1.2.3 From 92792ac752aa80d5ee71bc291d90edd06cd76bd1 Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Tue, 19 Dec 2023 11:32:41 +0200 Subject: virtio-pci: Introduce admin command sending function Add support for sending admin command through admin virtqueue interface. Abort any inflight admin commands once device reset completes. Activate admin queue when device becomes ready; deactivate on device reset. To comply to the below specification statement [1], the admin virtqueue is activated for upper layer users only after setting DRIVER_OK status. [1] The driver MUST NOT send any buffer available notifications to the device before setting DRIVER_OK. Signed-off-by: Feng Liu Reviewed-by: Parav Pandit Acked-by: Michael S. Tsirkin Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20231219093247.170936-4-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/virtio_pci.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 240ddeef7eae..187fd9e34a30 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -223,4 +223,26 @@ struct virtio_pci_cfg_cap { #endif /* VIRTIO_PCI_NO_MODERN */ +/* Admin command status. */ +#define VIRTIO_ADMIN_STATUS_OK 0 + +struct __packed virtio_admin_cmd_hdr { + __le16 opcode; + /* + * 1 - SR-IOV + * 2-65535 - reserved + */ + __le16 group_type; + /* Unused, reserved for future extensions. */ + __u8 reserved1[12]; + __le64 group_member_id; +}; + +struct __packed virtio_admin_cmd_status { + __le16 status; + __le16 status_qualifier; + /* Unused, reserved for future extensions. */ + __u8 reserved2[4]; +}; + #endif -- cgit v1.2.3 From 388431b9f59bbfde2b5f2fe032b0836158b09ad0 Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Tue, 19 Dec 2023 11:32:42 +0200 Subject: virtio-pci: Introduce admin commands Introduces admin commands, as follow: The "list query" command can be used by the driver to query the set of admin commands supported by the virtio device. The "list use" command is used to inform the virtio device which admin commands the driver will use. The "legacy common cfg rd/wr" commands are used to read from/write into the legacy common configuration structure. The "legacy dev cfg rd/wr" commands are used to read from/write into the legacy device configuration structure. The "notify info" command is used to query the notification region information. Signed-off-by: Feng Liu Reviewed-by: Parav Pandit Reviewed-by: Jiri Pirko Acked-by: Michael S. Tsirkin Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20231219093247.170936-5-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/virtio_pci.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 187fd9e34a30..ef3810dee7ef 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -226,6 +226,20 @@ struct virtio_pci_cfg_cap { /* Admin command status. */ #define VIRTIO_ADMIN_STATUS_OK 0 +/* Admin command opcode. */ +#define VIRTIO_ADMIN_CMD_LIST_QUERY 0x0 +#define VIRTIO_ADMIN_CMD_LIST_USE 0x1 + +/* Admin command group type. */ +#define VIRTIO_ADMIN_GROUP_TYPE_SRIOV 0x1 + +/* Transitional device admin command. */ +#define VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_WRITE 0x2 +#define VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_READ 0x3 +#define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_WRITE 0x4 +#define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5 +#define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6 + struct __packed virtio_admin_cmd_hdr { __le16 opcode; /* @@ -245,4 +259,31 @@ struct __packed virtio_admin_cmd_status { __u8 reserved2[4]; }; +struct __packed virtio_admin_cmd_legacy_wr_data { + __u8 offset; /* Starting offset of the register(s) to write. */ + __u8 reserved[7]; + __u8 registers[]; +}; + +struct __packed virtio_admin_cmd_legacy_rd_data { + __u8 offset; /* Starting offset of the register(s) to read. */ +}; + +#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_END 0 +#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_DEV 0x1 +#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_MEM 0x2 + +#define VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO 4 + +struct __packed virtio_admin_cmd_notify_info_data { + __u8 flags; /* 0 = end of list, 1 = owner device, 2 = member device */ + __u8 bar; /* BAR of the member or the owner device */ + __u8 padding[6]; + __le64 offset; /* Offset within bar. */ +}; + +struct virtio_admin_cmd_notify_info_result { + struct virtio_admin_cmd_notify_info_data entries[VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO]; +}; + #endif -- cgit v1.2.3 From e37a11fca41864c9f652ff81296b82e6f65a4242 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 17 Dec 2023 10:32:36 +0200 Subject: bridge: add MDB state mask uAPI attribute Currently, the 'state' field in 'struct br_port_msg' can be set to 1 if the MDB entry is permanent or 0 if it is temporary. Additional states might be added in the future. In a similar fashion to 'NDA_NDM_STATE_MASK', add an MDB state mask uAPI attribute that will allow the upcoming bulk deletion API to bulk delete MDB entries with a certain state or any state. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 2e23f99dc0f1..a5b743a2f775 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -757,6 +757,7 @@ enum { MDBE_ATTR_VNI, MDBE_ATTR_IFINDEX, MDBE_ATTR_SRC_VNI, + MDBE_ATTR_STATE_MASK, __MDBE_ATTR_MAX, }; #define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1) -- cgit v1.2.3 From cbc2fe9d9cb226347365753f50d81bc48cc3c52e Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 13 Dec 2023 13:57:41 +0800 Subject: kexec_file: add kexec_file flag to control debug printing Patch series "kexec_file: print out debugging message if required", v4. Currently, specifying '-d' on kexec command will print a lot of debugging informationabout kexec/kdump loading with kexec_load interface. However, kexec_file_load prints nothing even though '-d' is specified. It's very inconvenient to debug or analyze the kexec/kdump loading when something wrong happened with kexec/kdump itself or develper want to check the kexec/kdump loading. In this patchset, a kexec_file flag is KEXEC_FILE_DEBUG added and checked in code. If it's passed in, debugging message of kexec_file code will be printed out and can be seen from console and dmesg. Otherwise, the debugging message is printed like beofre when pr_debug() is taken. Note: **** ===== 1) The code in kexec-tools utility also need be changed to support passing KEXEC_FILE_DEBUG to kernel when 'kexec -s -d' is specified. The patch link is here: ========= [PATCH] kexec_file: add kexec_file flag to support debug printing http://lists.infradead.org/pipermail/kexec/2023-November/028505.html 2) s390 also has kexec_file code, while I am not sure what debugging information is necessary. So leave it to s390 developer. Test: **** ==== Testing was done in v1 on x86_64 and arm64. For v4, tested on x86_64 again. And on x86_64, the printed messages look like below: -------------------------------------------------------------- kexec measurement buffer for the loaded kernel at 0x207fffe000. Loaded purgatory at 0x207fff9000 Loaded boot_param, command line and misc at 0x207fff3000 bufsz=0x1180 memsz=0x1180 Loaded 64bit kernel at 0x207c000000 bufsz=0xc88200 memsz=0x3c4a000 Loaded initrd at 0x2079e79000 bufsz=0x2186280 memsz=0x2186280 Final command line is: root=/dev/mapper/fedora_intel--knightslanding--lb--02-root ro rd.lvm.lv=fedora_intel-knightslanding-lb-02/root console=ttyS0,115200N81 crashkernel=256M E820 memmap: 0000000000000000-000000000009a3ff (1) 000000000009a400-000000000009ffff (2) 00000000000e0000-00000000000fffff (2) 0000000000100000-000000006ff83fff (1) 000000006ff84000-000000007ac50fff (2) ...... 000000207fff6150-000000207fff615f (128) 000000207fff6160-000000207fff714f (1) 000000207fff7150-000000207fff715f (128) 000000207fff7160-000000207fff814f (1) 000000207fff8150-000000207fff815f (128) 000000207fff8160-000000207fffffff (1) nr_segments = 5 segment[0]: buf=0x000000004e5ece74 bufsz=0x211 mem=0x207fffe000 memsz=0x1000 segment[1]: buf=0x000000009e871498 bufsz=0x4000 mem=0x207fff9000 memsz=0x5000 segment[2]: buf=0x00000000d879f1fe bufsz=0x1180 mem=0x207fff3000 memsz=0x2000 segment[3]: buf=0x000000001101cd86 bufsz=0xc88200 mem=0x207c000000 memsz=0x3c4a000 segment[4]: buf=0x00000000c6e38ac7 bufsz=0x2186280 mem=0x2079e79000 memsz=0x2187000 kexec_file_load: type:0, start:0x207fff91a0 head:0x109e004002 flags:0x8 --------------------------------------------------------------------------- This patch (of 7): When specifying 'kexec -c -d', kexec_load interface will print loading information, e.g the regions where kernel/initrd/purgatory/cmdline are put, the memmap passed to 2nd kernel taken as system RAM ranges, and printing all contents of struct kexec_segment, etc. These are very helpful for analyzing or positioning what's happening when kexec/kdump itself failed. The debugging printing for kexec_load interface is made in user space utility kexec-tools. Whereas, with kexec_file_load interface, 'kexec -s -d' print nothing. Because kexec_file code is mostly implemented in kernel space, and the debugging printing functionality is missed. It's not convenient when debugging kexec/kdump loading and jumping with kexec_file_load interface. Now add KEXEC_FILE_DEBUG to kexec_file flag to control the debugging message printing. And add global variable kexec_file_dbg_print and macro kexec_dprintk() to facilitate the printing. This is a preparation, later kexec_dprintk() will be used to replace the existing pr_debug(). Once 'kexec -s -d' is specified, it will print out kexec/kdump loading information. If '-d' is not specified, it regresses to pr_debug(). Link: https://lkml.kernel.org/r/20231213055747.61826-1-bhe@redhat.com Link: https://lkml.kernel.org/r/20231213055747.61826-2-bhe@redhat.com Signed-off-by: Baoquan He Cc: Conor Dooley Cc: Joe Perches Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- include/uapi/linux/kexec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 01766dd839b0..c17bb096ea68 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -25,6 +25,7 @@ #define KEXEC_FILE_UNLOAD 0x00000001 #define KEXEC_FILE_ON_CRASH 0x00000002 #define KEXEC_FILE_NO_INITRAMFS 0x00000004 +#define KEXEC_FILE_DEBUG 0x00000008 /* These values match the ELF architecture values. * Unless there is a good reason that should continue to be the case. -- cgit v1.2.3 From 1ef83969bb12e594fe44ceba406095e80a824c91 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Dec 2023 15:12:04 -0500 Subject: uapi/linux/resource.h: fix include We should't be depending on time.h; we should only be pulling in other uapi headers. Signed-off-by: Kent Overstreet --- include/uapi/linux/resource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/resource.h b/include/uapi/linux/resource.h index ac5d6a3031db..4fc22908bc09 100644 --- a/include/uapi/linux/resource.h +++ b/include/uapi/linux/resource.h @@ -2,7 +2,7 @@ #ifndef _UAPI_LINUX_RESOURCE_H #define _UAPI_LINUX_RESOURCE_H -#include +#include #include /* -- cgit v1.2.3 From d293b1a89694fc4918d9a4330a71ba2458f9d581 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 21 Dec 2023 09:02:57 -0700 Subject: io_uring/kbuf: add method for returning provided buffer ring head The tail of the provided ring buffer is shared between the kernel and the application, but the head is private to the kernel as the application doesn't need to see it. However, this also prevents the application from knowing how many buffers the kernel has consumed. Usually this is fine, as the information is inherently racy in that the kernel could be consuming buffers continually, but for cleanup purposes it may be relevant to know how many buffers are still left in the ring. Add IORING_REGISTER_PBUF_STATUS which will return status for a given provided buffer ring. Right now it just returns the head, but space is reserved for more information later in, if needed. Link: https://github.com/axboe/liburing/discussions/1020 Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index db4b913e6b39..7a673b52827b 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -567,6 +567,9 @@ enum { /* register a range of fixed file slots for automatic slot allocation */ IORING_REGISTER_FILE_ALLOC_RANGE = 25, + /* return status information for a buffer group */ + IORING_REGISTER_PBUF_STATUS = 26, + /* this goes last */ IORING_REGISTER_LAST, @@ -693,6 +696,13 @@ struct io_uring_buf_reg { __u64 resv[3]; }; +/* argument for IORING_REGISTER_PBUF_STATUS */ +struct io_uring_buf_status { + __u32 buf_group; /* input */ + __u32 head; /* output */ + __u32 resv[8]; +}; + /* * io_uring_restriction->opcode values */ -- cgit v1.2.3 From 41a313d875e0c5822efb50e8221b8d58811609bb Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 20 Dec 2023 13:41:34 +0200 Subject: wifi: cfg80211: reg: Support P2P operation on DFS channels FCC-594280 D01 Section B.3 allows peer-to-peer and ad hoc devices to operate on DFS channels while they operate under the control of a concurrent DFS master. For example, it is possible to have a P2P GO on a DFS channel as long as BSS connection is active on the same channel. Allow such operation by adding additional regulatory flags to indicate DFS concurrent channels and capable devices. Add the required relaxations in DFS regulatory checks. Signed-off-by: Andrei Otcheretianski Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20231220133549.bdfb8a9c7c54.I973563562969a27fea8ec5685b96a3a47afe142f@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a682b54bd3ba..466da830e65f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4256,6 +4256,10 @@ enum nl80211_wmm_rule { * in current regulatory domain. * @NL80211_FREQUENCY_ATTR_PSD: Power spectral density (in dBm) that * is allowed on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_DFS_CONCURRENT: Operation on this channel is + * allowed for peer-to-peer or adhoc communication under the control + * of a DFS master which operates on the same channel (FCC-594280 D01 + * Section B.3). Should be used together with %NL80211_RRF_DFS only. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4295,6 +4299,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_320MHZ, NL80211_FREQUENCY_ATTR_NO_EHT, NL80211_FREQUENCY_ATTR_PSD, + NL80211_FREQUENCY_ATTR_DFS_CONCURRENT, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4500,6 +4505,10 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_NO_320MHZ: 320MHz operation not allowed * @NL80211_RRF_NO_EHT: EHT operation not allowed * @NL80211_RRF_PSD: Ruleset has power spectral density value + * @NL80211_RRF_DFS_CONCURRENT: Operation on this channel is allowed for + peer-to-peer or adhoc communication under the control of a DFS master + which operates on the same channel (FCC-594280 D01 Section B.3). + Should be used together with %NL80211_RRF_DFS only. */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -4521,6 +4530,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_320MHZ = 1<<18, NL80211_RRF_NO_EHT = 1<<19, NL80211_RRF_PSD = 1<<20, + NL80211_RRF_DFS_CONCURRENT = 1<<21, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR @@ -6492,6 +6502,11 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_OWE_OFFLOAD_AP: Driver/Device wants to do OWE DH IE * handling in AP mode. * + * @NL80211_EXT_FEATURE_DFS_CONCURRENT: The device supports peer-to-peer or + * ad hoc operation on DFS channels under the control of a concurrent + * DFS master on the same channel as described in FCC-594280 D01 + * (Section B.3). This, for example, allows P2P GO and P2P clients to + * operate on DFS channels as long as there's a concurrent BSS connection. * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6565,6 +6580,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_AUTH_AND_DEAUTH_RANDOM_TA, NL80211_EXT_FEATURE_OWE_OFFLOAD, NL80211_EXT_FEATURE_OWE_OFFLOAD_AP, + NL80211_EXT_FEATURE_DFS_CONCURRENT, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 645f3d85129d8aac3b896ba685fbc20a31c2c036 Mon Sep 17 00:00:00 2001 From: Mukesh Sisodiya Date: Wed, 20 Dec 2023 13:41:38 +0200 Subject: wifi: cfg80211: handle UHB AP and STA power type UHB AP send supported power type(LPI, SP, VLP) in beacon and probe response IE and STA should connect to these AP only if their regulatory support the AP power type. Beacon/Probe response are reported to userspace with reason "STA regulatory not supporting to connect to AP based on transmitted power type" and it should not connect to AP. Signed-off-by: Mukesh Sisodiya Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20231220133549.cbfbef9170a9.I432f78438de18aa9f5c9006be12e41dc34cc47c5@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 466da830e65f..1ccdcae24372 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4260,6 +4260,10 @@ enum nl80211_wmm_rule { * allowed for peer-to-peer or adhoc communication under the control * of a DFS master which operates on the same channel (FCC-594280 D01 * Section B.3). Should be used together with %NL80211_RRF_DFS only. + * @NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT: Client connection to VLP AP + * not allowed using this channel + * @NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT: Client connection to AFC AP + * not allowed using this channel * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4300,6 +4304,8 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_EHT, NL80211_FREQUENCY_ATTR_PSD, NL80211_FREQUENCY_ATTR_DFS_CONCURRENT, + NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT, + NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4509,6 +4515,8 @@ enum nl80211_sched_scan_match_attr { peer-to-peer or adhoc communication under the control of a DFS master which operates on the same channel (FCC-594280 D01 Section B.3). Should be used together with %NL80211_RRF_DFS only. + * @NL80211_RRF_NO_UHB_VLP_CLIENT: Client connection to VLP AP not allowed + * @NL80211_RRF_NO_UHB_AFC_CLIENT: Client connection to AFC AP not allowed */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -4531,6 +4539,8 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_EHT = 1<<19, NL80211_RRF_PSD = 1<<20, NL80211_RRF_DFS_CONCURRENT = 1<<21, + NL80211_RRF_NO_UHB_VLP_CLIENT = 1<<22, + NL80211_RRF_NO_UHB_AFC_CLIENT = 1<<23, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR @@ -5086,9 +5096,12 @@ enum nl80211_bss_use_for { * BSS isn't possible * @NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY: NSTR nonprimary links aren't * supported by the device, and this BSS entry represents one. + * @NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH: STA is not supporting + * the AP power type (SP, VLP, AP) that the AP uses. */ enum nl80211_bss_cannot_use_reasons { NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY = 1 << 0, + NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH = 1 << 1, }; /** -- cgit v1.2.3 From ea67677dbb0d30b993b15790d6cee24c900dd597 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 22 Dec 2023 14:54:37 +0000 Subject: lsm: Add a __counted_by() annotation to lsm_ctx.ctx The ctx in struct lsm_ctx is an array of size ctx_len, tell the compiler about this using __counted_by() where supported to improve the ability to detect overflow issues. Reported-by: Aishwarya TCV Signed-off-by: Mark Brown Signed-off-by: Paul Moore --- include/uapi/linux/lsm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lsm.h b/include/uapi/linux/lsm.h index f0386880a78e..f8aef9ade549 100644 --- a/include/uapi/linux/lsm.h +++ b/include/uapi/linux/lsm.h @@ -9,6 +9,7 @@ #ifndef _UAPI_LINUX_LSM_H #define _UAPI_LINUX_LSM_H +#include #include #include @@ -36,7 +37,7 @@ struct lsm_ctx { __u64 flags; __u64 len; __u64 ctx_len; - __u8 ctx[]; + __u8 ctx[] __counted_by(ctx_len); }; /* -- cgit v1.2.3 From 01fd1617dbc6f558efd1811f2bc433659d1e8304 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Tue, 19 Dec 2023 22:26:14 +0800 Subject: net/smc: support extended GID in SMC-D lgr netlink attribute Virtual ISM devices introduced in SMCv2.1 requires a 128 bit extended GID vs. the existing ISM 64bit GID. So the 2nd 64 bit of extended GID should be included in SMC-D linkgroup netlink attribute as well. Signed-off-by: Wen Gu Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 2 ++ include/uapi/linux/smc_diag.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 837fcd4b0abc..b531e3ef011a 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -160,6 +160,8 @@ enum { SMC_NLA_LGR_D_CHID, /* u16 */ SMC_NLA_LGR_D_PAD, /* flag */ SMC_NLA_LGR_D_V2_COMMON, /* nest */ + SMC_NLA_LGR_D_EXT_GID, /* u64 */ + SMC_NLA_LGR_D_PEER_EXT_GID, /* u64 */ __SMC_NLA_LGR_D_MAX, SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1 }; diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index 8cb3a6fef553..58eceb7f5df2 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -107,6 +107,8 @@ struct smcd_diag_dmbinfo { /* SMC-D Socket internals */ __aligned_u64 my_gid; /* My GID */ __aligned_u64 token; /* Token of DMB */ __aligned_u64 peer_token; /* Token of remote DMBE */ + __aligned_u64 peer_gid_ext; /* Peer GID (extended part) */ + __aligned_u64 my_gid_ext; /* My GID (extended part) */ }; #endif /* _UAPI_SMC_DIAG_H_ */ -- cgit v1.2.3 From 42f39036cda808d3de243192a2cf5125f12f3047 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Tue, 19 Dec 2023 15:16:23 -0300 Subject: net/sched: act_mirred: Allow mirred to block So far the mirred action has dealt with syntax that handles mirror/redirection for netdev. A matching packet is redirected or mirrored to a target netdev. In this patch we enable mirred to mirror to a tc block as well. IOW, the new syntax looks as follows: ... mirred [index INDEX] < | > > Examples of mirroring or redirecting to a tc block: $ tc filter add block 22 protocol ip pref 25 \ flower dst_ip 192.168.0.0/16 action mirred egress mirror blockid 22 $ tc filter add block 22 protocol ip pref 25 \ flower dst_ip 10.10.10.10/32 action mirred egress redirect blockid 22 Co-developed-by: Jamal Hadi Salim Signed-off-by: Jamal Hadi Salim Co-developed-by: Pedro Tammela Signed-off-by: Pedro Tammela Signed-off-by: Victor Nogueira Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_mirred.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h index 2500a0005d05..c61e76f3c23b 100644 --- a/include/uapi/linux/tc_act/tc_mirred.h +++ b/include/uapi/linux/tc_act/tc_mirred.h @@ -21,6 +21,7 @@ enum { TCA_MIRRED_TM, TCA_MIRRED_PARMS, TCA_MIRRED_PAD, + TCA_MIRRED_BLOCKID, __TCA_MIRRED_MAX }; #define TCA_MIRRED_MAX (__TCA_MIRRED_MAX - 1) -- cgit v1.2.3 From d0c3891db2d279b2f5ff8fd174e0b09e75dea039 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 19 Dec 2023 16:53:46 -0700 Subject: ethtool: reformat kerneldoc for struct ethtool_link_settings The kernel doc comments for struct ethtool_link_settings includes documentation for three fields that were never present there, leading to these docs-build warnings: ./include/uapi/linux/ethtool.h:2207: warning: Excess struct member 'supported' description in 'ethtool_link_settings' ./include/uapi/linux/ethtool.h:2207: warning: Excess struct member 'advertising' description in 'ethtool_link_settings' ./include/uapi/linux/ethtool.h:2207: warning: Excess struct member 'lp_advertising' description in 'ethtool_link_settings' Remove the entries to make the warnings go away. There was some information there on how data in >link_mode_masks is formatted; move that to the body of the comment to preserve it. Signed-off-by: Jonathan Corbet Reviewed-by: Randy Dunlap Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 0787d561ace0..85c412c23ab5 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2139,18 +2139,6 @@ enum ethtool_reset_flags { * refused. For drivers: ignore this field (use kernel's * __ETHTOOL_LINK_MODE_MASK_NBITS instead), any change to it will * be overwritten by kernel. - * @supported: Bitmap with each bit meaning given by - * %ethtool_link_mode_bit_indices for the link modes, physical - * connectors and other link features for which the interface - * supports autonegotiation or auto-detection. Read-only. - * @advertising: Bitmap with each bit meaning given by - * %ethtool_link_mode_bit_indices for the link modes, physical - * connectors and other link features that are advertised through - * autonegotiation or enabled for auto-detection. - * @lp_advertising: Bitmap with each bit meaning given by - * %ethtool_link_mode_bit_indices for the link modes, and other - * link features that the link partner advertised through - * autonegotiation; 0 if unknown or not applicable. Read-only. * @transceiver: Used to distinguish different possible PHY types, * reported consistently by PHYLIB. Read-only. * @master_slave_cfg: Master/slave port mode. @@ -2192,6 +2180,21 @@ enum ethtool_reset_flags { * %set_link_ksettings() should validate all fields other than @cmd * and @link_mode_masks_nwords that are not described as read-only or * deprecated, and must ignore all fields described as read-only. + * + * @link_mode_masks is divided into three bitfields, each of length + * @link_mode_masks_nwords: + * - supported: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, physical + * connectors and other link features for which the interface + * supports autonegotiation or auto-detection. Read-only. + * - advertising: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, physical + * connectors and other link features that are advertised through + * autonegotiation or enabled for auto-detection. + * - lp_advertising: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, and other + * link features that the link partner advertised through + * autonegotiation; 0 if unknown or not applicable. Read-only. */ struct ethtool_link_settings { __u32 cmd; -- cgit v1.2.3 From adef440691bab824e39c1b17382322d195e1fab0 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 6 Dec 2023 02:36:56 -0800 Subject: userfaultfd: UFFDIO_MOVE uABI Implement the uABI of UFFDIO_MOVE ioctl. UFFDIO_COPY performs ~20% better than UFFDIO_MOVE when the application needs pages to be allocated [1]. However, with UFFDIO_MOVE, if pages are available (in userspace) for recycling, as is usually the case in heap compaction algorithms, then we can avoid the page allocation and memcpy (done by UFFDIO_COPY). Also, since the pages are recycled in the userspace, we avoid the need to release (via madvise) the pages back to the kernel [2]. We see over 40% reduction (on a Google pixel 6 device) in the compacting thread's completion time by using UFFDIO_MOVE vs. UFFDIO_COPY. This was measured using a benchmark that emulates a heap compaction implementation using userfaultfd (to allow concurrent accesses by application threads). More details of the usecase are explained in [2]. Furthermore, UFFDIO_MOVE enables moving swapped-out pages without touching them within the same vma. Today, it can only be done by mremap, however it forces splitting the vma. [1] https://lore.kernel.org/all/1425575884-2574-1-git-send-email-aarcange@redhat.com/ [2] https://lore.kernel.org/linux-mm/CA+EESO4uO84SSnBhArH4HvLNhaUQ5nZKNKXqxRCyjniNVjp0Aw@mail.gmail.com/ Update for the ioctl_userfaultfd(2) manpage: UFFDIO_MOVE (Since Linux xxx) Move a continuous memory chunk into the userfault registered range and optionally wake up the blocked thread. The source and destination addresses and the number of bytes to move are specified by the src, dst, and len fields of the uffdio_move structure pointed to by argp: struct uffdio_move { __u64 dst; /* Destination of move */ __u64 src; /* Source of move */ __u64 len; /* Number of bytes to move */ __u64 mode; /* Flags controlling behavior of move */ __s64 move; /* Number of bytes moved, or negated error */ }; The following value may be bitwise ORed in mode to change the behavior of the UFFDIO_MOVE operation: UFFDIO_MOVE_MODE_DONTWAKE Do not wake up the thread that waits for page-fault resolution UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES Allow holes in the source virtual range that is being moved. When not specified, the holes will result in ENOENT error. When specified, the holes will be accounted as successfully moved memory. This is mostly useful to move hugepage aligned virtual regions without knowing if there are transparent hugepages in the regions or not, but preventing the risk of having to split the hugepage during the operation. The move field is used by the kernel to return the number of bytes that was actually moved, or an error (a negated errno- style value). If the value returned in move doesn't match the value that was specified in len, the operation fails with the error EAGAIN. The move field is output-only; it is not read by the UFFDIO_MOVE operation. The operation may fail for various reasons. Usually, remapping of pages that are not exclusive to the given process fail; once KSM might deduplicate pages or fork() COW-shares pages during fork() with child processes, they are no longer exclusive. Further, the kernel might only perform lightweight checks for detecting whether the pages are exclusive, and return -EBUSY in case that check fails. To make the operation more likely to succeed, KSM should be disabled, fork() should be avoided or MADV_DONTFORK should be configured for the source VMA before fork(). This ioctl(2) operation returns 0 on success. In this case, the entire area was moved. On error, -1 is returned and errno is set to indicate the error. Possible errors include: EAGAIN The number of bytes moved (i.e., the value returned in the move field) does not equal the value that was specified in the len field. EINVAL Either dst or len was not a multiple of the system page size, or the range specified by src and len or dst and len was invalid. EINVAL An invalid bit was specified in the mode field. ENOENT The source virtual memory range has unmapped holes and UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES is not set. EEXIST The destination virtual memory range is fully or partially mapped. EBUSY The pages in the source virtual memory range are either pinned or not exclusive to the process. The kernel might only perform lightweight checks for detecting whether the pages are exclusive. To make the operation more likely to succeed, KSM should be disabled, fork() should be avoided or MADV_DONTFORK should be configured for the source virtual memory area before fork(). ENOMEM Allocating memory needed for the operation failed. ESRCH The target process has exited at the time of a UFFDIO_MOVE operation. Link: https://lkml.kernel.org/r/20231206103702.3873743-3-surenb@google.com Signed-off-by: Andrea Arcangeli Signed-off-by: Suren Baghdasaryan Cc: Al Viro Cc: Axel Rasmussen Cc: Brian Geffon Cc: Christian Brauner Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jann Horn Cc: Kalesh Singh Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport (IBM) Cc: Nicolas Geoffray Cc: Peter Xu Cc: Ryan Roberts Cc: Shuah Khan Cc: ZhangPeng Signed-off-by: Andrew Morton --- include/uapi/linux/userfaultfd.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 0dbc81015018..2841e4ea8f2c 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -41,7 +41,8 @@ UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \ UFFD_FEATURE_WP_UNPOPULATED | \ UFFD_FEATURE_POISON | \ - UFFD_FEATURE_WP_ASYNC) + UFFD_FEATURE_WP_ASYNC | \ + UFFD_FEATURE_MOVE) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -50,6 +51,7 @@ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ (__u64)1 << _UFFDIO_ZEROPAGE | \ + (__u64)1 << _UFFDIO_MOVE | \ (__u64)1 << _UFFDIO_WRITEPROTECT | \ (__u64)1 << _UFFDIO_CONTINUE | \ (__u64)1 << _UFFDIO_POISON) @@ -73,6 +75,7 @@ #define _UFFDIO_WAKE (0x02) #define _UFFDIO_COPY (0x03) #define _UFFDIO_ZEROPAGE (0x04) +#define _UFFDIO_MOVE (0x05) #define _UFFDIO_WRITEPROTECT (0x06) #define _UFFDIO_CONTINUE (0x07) #define _UFFDIO_POISON (0x08) @@ -92,6 +95,8 @@ struct uffdio_copy) #define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ struct uffdio_zeropage) +#define UFFDIO_MOVE _IOWR(UFFDIO, _UFFDIO_MOVE, \ + struct uffdio_move) #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ struct uffdio_writeprotect) #define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ @@ -222,6 +227,9 @@ struct uffdio_api { * asynchronous mode is supported in which the write fault is * automatically resolved and write-protection is un-set. * It implies UFFD_FEATURE_WP_UNPOPULATED. + * + * UFFD_FEATURE_MOVE indicates that the kernel supports moving an + * existing page contents from userspace. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -239,6 +247,7 @@ struct uffdio_api { #define UFFD_FEATURE_WP_UNPOPULATED (1<<13) #define UFFD_FEATURE_POISON (1<<14) #define UFFD_FEATURE_WP_ASYNC (1<<15) +#define UFFD_FEATURE_MOVE (1<<16) __u64 features; __u64 ioctls; @@ -347,6 +356,24 @@ struct uffdio_poison { __s64 updated; }; +struct uffdio_move { + __u64 dst; + __u64 src; + __u64 len; + /* + * Especially if used to atomically remove memory from the + * address space the wake on the dst range is not needed. + */ +#define UFFDIO_MOVE_MODE_DONTWAKE ((__u64)1<<0) +#define UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES ((__u64)1<<1) + __u64 mode; + /* + * "move" is written by the ioctl and must be at the end: the + * copy_from_user will not read the last 8 bytes. + */ + __s64 move; +}; + /* * Flags for the userfaultfd(2) system call itself. */ -- cgit v1.2.3 From 02018c544ef113e980a2349eba89003d6f399d22 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 21 Dec 2023 19:00:34 +0100 Subject: net: phy: Introduce ethernet link topology representation Link topologies containing multiple network PHYs attached to the same net_device can be found when using a PHY as a media converter for use with an SFP connector, on which an SFP transceiver containing a PHY can be used. With the current model, the transceiver's PHY can't be used for operations such as cable testing, timestamping, macsec offload, etc. The reason being that most of the logic for these configuration, coming from either ethtool netlink or ioctls tend to use netdev->phydev, which in multi-phy systems will reference the PHY closest to the MAC. Introduce a numbering scheme allowing to enumerate PHY devices that belong to any netdev, which can in turn allow userspace to take more precise decisions with regard to each PHY's configuration. The numbering is maintained per-netdev, in a phy_device_list. The numbering works similarly to a netdevice's ifindex, with identifiers that are only recycled once INT_MAX has been reached. This prevents races that could occur between PHY listing and SFP transceiver removal/insertion. The identifiers are assigned at phy_attach time, as the numbering depends on the netdevice the phy is attached to. Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 85c412c23ab5..60801df9d8c0 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2219,4 +2219,20 @@ struct ethtool_link_settings { * __u32 map_lp_advertising[link_mode_masks_nwords]; */ }; + +/** + * enum phy_upstream - Represents the upstream component a given PHY device + * is connected to, as in what is on the other end of the MII bus. Most PHYs + * will be attached to an Ethernet MAC controller, but in some cases, there's + * an intermediate PHY used as a media-converter, which will driver another + * MII interface as its output. + * @PHY_UPSTREAM_MAC: Upstream component is a MAC (a switch port, + * or ethernet controller) + * @PHY_UPSTREAM_PHY: Upstream component is a PHY (likely a media converter) + */ +enum phy_upstream { + PHY_UPSTREAM_MAC, + PHY_UPSTREAM_PHY, +}; + #endif /* _UAPI_LINUX_ETHTOOL_H */ -- cgit v1.2.3 From 2ab0edb505faa9ac90dee1732571390f074e8113 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 21 Dec 2023 19:00:38 +0100 Subject: net: ethtool: Allow passing a phy index for some commands Some netlink commands are target towards ethernet PHYs, to control some of their features. As there's several such commands, add the ability to pass a PHY index in the ethnl request, which will populate the generic ethnl_req_info with the relevant phydev when the command targets a PHY. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 3f89074aa06c..422e8cfdd98c 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -133,6 +133,7 @@ enum { ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ ETHTOOL_A_HEADER_DEV_NAME, /* string */ ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ + ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ /* add new constants above here */ __ETHTOOL_A_HEADER_CNT, -- cgit v1.2.3 From 63d5eaf35ac36cad00cfb3809d794ef0078c822b Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 21 Dec 2023 19:00:40 +0100 Subject: net: ethtool: Introduce a command to list PHYs on an interface As we have the ability to track the PHYs connected to a net_device through the link_topology, we can expose this list to userspace. This allows userspace to use these identifiers for phy-specific commands and take the decision of which PHY to target by knowing the link topology. Add PHY_GET and PHY_DUMP, which can be a filtered DUMP operation to list devices on only one interface. Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 422e8cfdd98c..00cd7ad16709 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -57,6 +57,7 @@ enum { ETHTOOL_MSG_PLCA_GET_STATUS, ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, + ETHTOOL_MSG_PHY_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -109,6 +110,8 @@ enum { ETHTOOL_MSG_PLCA_NTF, ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, + ETHTOOL_MSG_PHY_GET_REPLY, + ETHTOOL_MSG_PHY_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -977,6 +980,32 @@ enum { ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; +enum { + ETHTOOL_A_PHY_UPSTREAM_UNSPEC, + ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */ + ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */ + + /* add new constants above here */ + __ETHTOOL_A_PHY_UPSTREAM_CNT, + ETHTOOL_A_PHY_UPSTREAM_MAX = (__ETHTOOL_A_PHY_UPSTREAM_CNT - 1) +}; + +enum { + ETHTOOL_A_PHY_UNSPEC, + ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PHY_INDEX, /* u32 */ + ETHTOOL_A_PHY_DRVNAME, /* string */ + ETHTOOL_A_PHY_NAME, /* string */ + ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u8 */ + ETHTOOL_A_PHY_UPSTREAM, /* nest - _A_PHY_UPSTREAM_* */ + ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */ + ETHTOOL_A_PHY_ID, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_PHY_CNT, + ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3 From ba24ea129126362e7139fed4e13701ca5b71ac0b Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Thu, 21 Dec 2023 16:31:03 -0500 Subject: net/sched: Retire ipt action The tc ipt action was intended to run all netfilter/iptables target. Unfortunately it has not benefitted over the years from proper updates when netfilter changes, and for that reason it has remained rudimentary. Pinging a bunch of people that i was aware were using this indicates that removing it wont affect them. Retire it to reduce maintenance efforts. Buh-bye. Reviewed-by: Victor Noguiera Reviewed-by: Pedro Tammela Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 4 ++-- include/uapi/linux/tc_act/tc_ipt.h | 20 -------------------- 2 files changed, 2 insertions(+), 22 deletions(-) delete mode 100644 include/uapi/linux/tc_act/tc_ipt.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index c7082cc60d21..2fec9b51d28d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -99,7 +99,7 @@ enum { * versions. */ #define TCA_ACT_GACT 5 -#define TCA_ACT_IPT 6 +#define TCA_ACT_IPT 6 /* obsoleted, can be reused */ #define TCA_ACT_PEDIT 7 #define TCA_ACT_MIRRED 8 #define TCA_ACT_NAT 9 @@ -120,7 +120,7 @@ enum tca_id { TCA_ID_UNSPEC = 0, TCA_ID_POLICE = 1, TCA_ID_GACT = TCA_ACT_GACT, - TCA_ID_IPT = TCA_ACT_IPT, + TCA_ID_IPT = TCA_ACT_IPT, /* Obsoleted, can be reused */ TCA_ID_PEDIT = TCA_ACT_PEDIT, TCA_ID_MIRRED = TCA_ACT_MIRRED, TCA_ID_NAT = TCA_ACT_NAT, diff --git a/include/uapi/linux/tc_act/tc_ipt.h b/include/uapi/linux/tc_act/tc_ipt.h deleted file mode 100644 index c48d7da6750d..000000000000 --- a/include/uapi/linux/tc_act/tc_ipt.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_TC_IPT_H -#define __LINUX_TC_IPT_H - -#include - -enum { - TCA_IPT_UNSPEC, - TCA_IPT_TABLE, - TCA_IPT_HOOK, - TCA_IPT_INDEX, - TCA_IPT_CNT, - TCA_IPT_TM, - TCA_IPT_TARG, - TCA_IPT_PAD, - __TCA_IPT_MAX -}; -#define TCA_IPT_MAX (__TCA_IPT_MAX - 1) - -#endif -- cgit v1.2.3 From 41bc3e8fc1f728085da0ca6dbc1bef4a2ddb543c Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sat, 23 Dec 2023 09:01:50 -0500 Subject: net/sched: Remove uapi support for rsvp classifier commit 265b4da82dbf ("net/sched: Retire rsvp classifier") retired the TC RSVP classifier. Remove UAPI for it. Iproute2 will sync by equally removing it from user space. Reviewed-by: Victor Nogueira Reviewed-by: Pedro Tammela Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 31 ------------------------------- 1 file changed, 31 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 2fec9b51d28d..fe922b61b99e 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -280,37 +280,6 @@ struct tc_u32_pcnt { #define TC_U32_MAXDEPTH 8 - -/* RSVP filter */ - -enum { - TCA_RSVP_UNSPEC, - TCA_RSVP_CLASSID, - TCA_RSVP_DST, - TCA_RSVP_SRC, - TCA_RSVP_PINFO, - TCA_RSVP_POLICE, - TCA_RSVP_ACT, - __TCA_RSVP_MAX -}; - -#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 ) - -struct tc_rsvp_gpi { - __u32 key; - __u32 mask; - int offset; -}; - -struct tc_rsvp_pinfo { - struct tc_rsvp_gpi dpi; - struct tc_rsvp_gpi spi; - __u8 protocol; - __u8 tunnelid; - __u8 tunnelhdr; - __u8 pad; -}; - /* ROUTE filter */ enum { -- cgit v1.2.3 From 82b2545ed9a465e4c470d2dbbb461522f767c56f Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sat, 23 Dec 2023 09:01:51 -0500 Subject: net/sched: Remove uapi support for tcindex classifier commit 8c710f75256b ("net/sched: Retire tcindex classifier") retired the TC tcindex classifier. Remove UAPI for it. Iproute2 will sync by equally removing it from user space. Reviewed-by: Victor Nogueira Reviewed-by: Pedro Tammela Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index fe922b61b99e..ea277039f89d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -310,22 +310,6 @@ enum { #define TCA_FW_MAX (__TCA_FW_MAX - 1) -/* TC index filter */ - -enum { - TCA_TCINDEX_UNSPEC, - TCA_TCINDEX_HASH, - TCA_TCINDEX_MASK, - TCA_TCINDEX_SHIFT, - TCA_TCINDEX_FALL_THROUGH, - TCA_TCINDEX_CLASSID, - TCA_TCINDEX_POLICE, - TCA_TCINDEX_ACT, - __TCA_TCINDEX_MAX -}; - -#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1) - /* Flow filter */ enum { -- cgit v1.2.3 From fe3b739a5472968d8d349522b6816bc4db82bc0f Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sat, 23 Dec 2023 09:01:52 -0500 Subject: net/sched: Remove uapi support for dsmark qdisc Commit bbe77c14ee61 ("net/sched: Retire dsmark qdisc") retired the dsmark classifier. Remove UAPI support for it. Iproute2 will sync by equally removing it from user space. Reviewed-by: Victor Nogueira Reviewed-by: Pedro Tammela Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index f762a10bfb78..1e3a2b9ddf7e 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -557,20 +557,6 @@ enum { #define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1) -/* dsmark section */ - -enum { - TCA_DSMARK_UNSPEC, - TCA_DSMARK_INDICES, - TCA_DSMARK_DEFAULT_INDEX, - TCA_DSMARK_SET_TC_INDEX, - TCA_DSMARK_MASK, - TCA_DSMARK_VALUE, - __TCA_DSMARK_MAX, -}; - -#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1) - /* ATM section */ enum { -- cgit v1.2.3 From 26cc8714fc7f79a806c3d7ffa215b984c384ab4d Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sat, 23 Dec 2023 09:01:53 -0500 Subject: net/sched: Remove uapi support for ATM qdisc Commit fb38306ceb9e ("net/sched: Retire ATM qdisc") retired the ATM qdisc. Remove UAPI for it. Iproute2 will sync by equally removing it from user space. Reviewed-by: Victor Nogueira Reviewed-by: Pedro Tammela Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 1e3a2b9ddf7e..28f08acdad52 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -557,21 +557,6 @@ enum { #define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1) -/* ATM section */ - -enum { - TCA_ATM_UNSPEC, - TCA_ATM_FD, /* file/socket descriptor */ - TCA_ATM_PTR, /* pointer to descriptor - later */ - TCA_ATM_HDR, /* LL header */ - TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */ - TCA_ATM_ADDR, /* PVC address (for output only) */ - TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */ - __TCA_ATM_MAX, -}; - -#define TCA_ATM_MAX (__TCA_ATM_MAX - 1) - /* Network emulator */ enum { -- cgit v1.2.3 From 33241dca486264193ed68167c8eeae1fb197f3df Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sat, 23 Dec 2023 09:01:54 -0500 Subject: net/sched: Remove uapi support for CBQ qdisc Commit 051d44209842 ("net/sched: Retire CBQ qdisc") retired the CBQ qdisc. Remove UAPI for it. Iproute2 will sync by equally removing it from user space. Reviewed-by: Victor Nogueira Reviewed-by: Pedro Tammela Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 80 ------------------------------------------ 1 file changed, 80 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 28f08acdad52..a3cd0c2dc995 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -477,86 +477,6 @@ enum { #define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1) - -/* CBQ section */ - -#define TC_CBQ_MAXPRIO 8 -#define TC_CBQ_MAXLEVEL 8 -#define TC_CBQ_DEF_EWMA 5 - -struct tc_cbq_lssopt { - unsigned char change; - unsigned char flags; -#define TCF_CBQ_LSS_BOUNDED 1 -#define TCF_CBQ_LSS_ISOLATED 2 - unsigned char ewma_log; - unsigned char level; -#define TCF_CBQ_LSS_FLAGS 1 -#define TCF_CBQ_LSS_EWMA 2 -#define TCF_CBQ_LSS_MAXIDLE 4 -#define TCF_CBQ_LSS_MINIDLE 8 -#define TCF_CBQ_LSS_OFFTIME 0x10 -#define TCF_CBQ_LSS_AVPKT 0x20 - __u32 maxidle; - __u32 minidle; - __u32 offtime; - __u32 avpkt; -}; - -struct tc_cbq_wrropt { - unsigned char flags; - unsigned char priority; - unsigned char cpriority; - unsigned char __reserved; - __u32 allot; - __u32 weight; -}; - -struct tc_cbq_ovl { - unsigned char strategy; -#define TC_CBQ_OVL_CLASSIC 0 -#define TC_CBQ_OVL_DELAY 1 -#define TC_CBQ_OVL_LOWPRIO 2 -#define TC_CBQ_OVL_DROP 3 -#define TC_CBQ_OVL_RCLASSIC 4 - unsigned char priority2; - __u16 pad; - __u32 penalty; -}; - -struct tc_cbq_police { - unsigned char police; - unsigned char __res1; - unsigned short __res2; -}; - -struct tc_cbq_fopt { - __u32 split; - __u32 defmap; - __u32 defchange; -}; - -struct tc_cbq_xstats { - __u32 borrows; - __u32 overactions; - __s32 avgidle; - __s32 undertime; -}; - -enum { - TCA_CBQ_UNSPEC, - TCA_CBQ_LSSOPT, - TCA_CBQ_WRROPT, - TCA_CBQ_FOPT, - TCA_CBQ_OVL_STRATEGY, - TCA_CBQ_RATE, - TCA_CBQ_RTAB, - TCA_CBQ_POLICE, - __TCA_CBQ_MAX, -}; - -#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1) - /* Network emulator */ enum { -- cgit v1.2.3 From 0dd415d155050f5c1cf360b97f905d42d44f33ed Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Thu, 21 Dec 2023 11:42:35 -0700 Subject: net: ethtool: add a NO_CHANGE uAPI for new RXFH's input_xfrm Add a NO_CHANGE uAPI value for the new RXFH/RSS input_xfrm uAPI field. This needed so that user-space can set other RSS values (hkey or indir table) without affecting input_xfrm. Should have been part of [1]. Link: https://lore.kernel.org/netdev/20231213003321.605376-1-ahmed.zaki@intel.com/ [1] Fixes: 13e59344fb9d ("net: ethtool: add support for symmetric-xor RSS hash") Reviewed-by: Jacob Keller Signed-off-by: Ahmed Zaki Link: https://lore.kernel.org/r/20231221184235.9192-3-ahmed.zaki@intel.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 60801df9d8c0..01ba529dbb6d 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2002,6 +2002,7 @@ static inline int ethtool_validate_duplex(__u8 duplex) * be exploited to reduce the RSS queue spread. */ #define RXH_XFRM_SYM_XOR (1 << 0) +#define RXH_XFRM_NO_CHANGE 0xff /* L2-L4 network traffic flow types */ #define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ -- cgit v1.2.3 From 76ac8e29855b06331d77a1d237a28ce97ac67a38 Mon Sep 17 00:00:00 2001 From: Crescent CY Hsieh Date: Fri, 1 Dec 2023 15:15:53 +0800 Subject: tty: serial: Cleanup the bit shift with macro This patch replaces the bit shift code with "_BITUL()" macro inside "serial_rs485" struct. Signed-off-by: Crescent CY Hsieh Link: https://lore.kernel.org/r/20231201071554.258607-2-crescentcy.hsieh@moxa.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h index 53bc1af67a41..6c75ebdd7797 100644 --- a/include/uapi/linux/serial.h +++ b/include/uapi/linux/serial.h @@ -11,6 +11,7 @@ #ifndef _UAPI_LINUX_SERIAL_H #define _UAPI_LINUX_SERIAL_H +#include #include #include @@ -140,14 +141,14 @@ struct serial_icounter_struct { */ struct serial_rs485 { __u32 flags; -#define SER_RS485_ENABLED (1 << 0) -#define SER_RS485_RTS_ON_SEND (1 << 1) -#define SER_RS485_RTS_AFTER_SEND (1 << 2) -#define SER_RS485_RX_DURING_TX (1 << 4) -#define SER_RS485_TERMINATE_BUS (1 << 5) -#define SER_RS485_ADDRB (1 << 6) -#define SER_RS485_ADDR_RECV (1 << 7) -#define SER_RS485_ADDR_DEST (1 << 8) +#define SER_RS485_ENABLED _BITUL(0) +#define SER_RS485_RTS_ON_SEND _BITUL(1) +#define SER_RS485_RTS_AFTER_SEND _BITUL(2) +#define SER_RS485_RX_DURING_TX _BITUL(3) +#define SER_RS485_TERMINATE_BUS _BITUL(4) +#define SER_RS485_ADDRB _BITUL(5) +#define SER_RS485_ADDR_RECV _BITUL(6) +#define SER_RS485_ADDR_DEST _BITUL(7) __u32 delay_rts_before_send; __u32 delay_rts_after_send; -- cgit v1.2.3 From 6056f20f27e99fb67582f299468328505f130e36 Mon Sep 17 00:00:00 2001 From: Crescent CY Hsieh Date: Fri, 1 Dec 2023 15:15:54 +0800 Subject: tty: serial: Add RS422 flag to struct serial_rs485 Add "SER_RS485_MODE_RS422" flag to struct serial_rs485, so that serial port can switch interface into RS422 if supported by using ioctl command "TIOCSRS485". By treating RS422 as a mode of RS485, which means while enabling RS422 there are two flags need to be set (SER_RS485_ENABLED and SER_RS485_MODE_RS422), it would make things much easier. For example some places that checks for "SER_RS485_ENABLED" won't need to be rewritten. Signed-off-by: Crescent CY Hsieh Link: https://lore.kernel.org/r/20231201071554.258607-3-crescentcy.hsieh@moxa.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h index 6c75ebdd7797..9086367db043 100644 --- a/include/uapi/linux/serial.h +++ b/include/uapi/linux/serial.h @@ -138,6 +138,7 @@ struct serial_icounter_struct { * * %SER_RS485_ADDRB - Enable RS485 addressing mode. * * %SER_RS485_ADDR_RECV - Receive address filter (enables @addr_recv). Requires %SER_RS485_ADDRB. * * %SER_RS485_ADDR_DEST - Destination address (enables @addr_dest). Requires %SER_RS485_ADDRB. + * * %SER_RS485_MODE_RS422 - Enable RS422. Requires %SER_RS485_ENABLED. */ struct serial_rs485 { __u32 flags; @@ -149,6 +150,7 @@ struct serial_rs485 { #define SER_RS485_ADDRB _BITUL(5) #define SER_RS485_ADDR_RECV _BITUL(6) #define SER_RS485_ADDR_DEST _BITUL(7) +#define SER_RS485_MODE_RS422 _BITUL(8) __u32 delay_rts_before_send; __u32 delay_rts_after_send; -- cgit v1.2.3 From 98e20e5e13d2811898921f999288be7151a11954 Mon Sep 17 00:00:00 2001 From: Quentin Deslandes Date: Tue, 26 Dec 2023 14:07:42 +0100 Subject: bpfilter: remove bpfilter bpfilter was supposed to convert iptables filtering rules into BPF programs on the fly, from the kernel, through a usermode helper. The base code for the UMH was introduced in 2018, and couple of attempts (2, 3) tried to introduce the BPF program generate features but were abandoned. bpfilter now sits in a kernel tree unused and unusable, occasionally causing confusion amongst Linux users (4, 5). As bpfilter is now developed in a dedicated repository on GitHub (6), it was suggested a couple of times this year (LSFMM/BPF 2023, LPC 2023) to remove the deprecated kernel part of the project. This is the purpose of this patch. [1]: https://lore.kernel.org/lkml/20180522022230.2492505-1-ast@kernel.org/ [2]: https://lore.kernel.org/bpf/20210829183608.2297877-1-me@ubique.spb.ru/#t [3]: https://lore.kernel.org/lkml/20221224000402.476079-1-qde@naccy.de/ [4]: https://dxuuu.xyz/bpfilter.html [5]: https://github.com/linuxkit/linuxkit/pull/3904 [6]: https://github.com/facebook/bpfilter Signed-off-by: Quentin Deslandes Link: https://lore.kernel.org/r/20231226130745.465988-1-qde@naccy.de Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpfilter.h | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 include/uapi/linux/bpfilter.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpfilter.h b/include/uapi/linux/bpfilter.h deleted file mode 100644 index cbc1f5813f50..000000000000 --- a/include/uapi/linux/bpfilter.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_LINUX_BPFILTER_H -#define _UAPI_LINUX_BPFILTER_H - -#include - -enum { - BPFILTER_IPT_SO_SET_REPLACE = 64, - BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65, - BPFILTER_IPT_SET_MAX, -}; - -enum { - BPFILTER_IPT_SO_GET_INFO = 64, - BPFILTER_IPT_SO_GET_ENTRIES = 65, - BPFILTER_IPT_SO_GET_REVISION_MATCH = 66, - BPFILTER_IPT_SO_GET_REVISION_TARGET = 67, - BPFILTER_IPT_GET_MAX, -}; - -#endif /* _UAPI_LINUX_BPFILTER_H */ -- cgit v1.2.3 From fe1eb24bd5ade085914248c527044e942f75e06a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 4 Jan 2024 16:04:35 -0800 Subject: Revert "Introduce PHY listing and link_topology tracking" This reverts commit 32bb4515e34469975abc936deb0a116c4a445817. This reverts commit d078d480639a4f3b5fc2d56247afa38e0956483a. This reverts commit fcc4b105caa4b844bf043375bf799c20a9c99db1. This reverts commit 345237dbc1bdbb274c9fb9ec38976261ff4a40b8. This reverts commit 7db69ec9cfb8b4ab50420262631fb2d1908b25bf. This reverts commit 95132a018f00f5dad38bdcfd4180d1af955d46f6. This reverts commit 63d5eaf35ac36cad00cfb3809d794ef0078c822b. This reverts commit c29451aefcb42359905d18678de38e52eccb3bb5. This reverts commit 2ab0edb505faa9ac90dee1732571390f074e8113. This reverts commit dedd702a35793ab462fce4c737eeba0badf9718e. This reverts commit 034fcc210349b873ece7356905be5c6ca11eef2a. This reverts commit 9c5625f559ad6fe9f6f733c11475bf470e637d34. This reverts commit 02018c544ef113e980a2349eba89003d6f399d22. Looks like we need more time for reviews, and incremental changes will be hard to make sense of. So revert. Link: https://lore.kernel.org/all/ZZP6FV5sXEf+xd58@shell.armlinux.org.uk/ Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 16 ---------------- include/uapi/linux/ethtool_netlink.h | 30 ------------------------------ 2 files changed, 46 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 01ba529dbb6d..06ef6b78b7de 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2220,20 +2220,4 @@ struct ethtool_link_settings { * __u32 map_lp_advertising[link_mode_masks_nwords]; */ }; - -/** - * enum phy_upstream - Represents the upstream component a given PHY device - * is connected to, as in what is on the other end of the MII bus. Most PHYs - * will be attached to an Ethernet MAC controller, but in some cases, there's - * an intermediate PHY used as a media-converter, which will driver another - * MII interface as its output. - * @PHY_UPSTREAM_MAC: Upstream component is a MAC (a switch port, - * or ethernet controller) - * @PHY_UPSTREAM_PHY: Upstream component is a PHY (likely a media converter) - */ -enum phy_upstream { - PHY_UPSTREAM_MAC, - PHY_UPSTREAM_PHY, -}; - #endif /* _UAPI_LINUX_ETHTOOL_H */ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 00cd7ad16709..3f89074aa06c 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -57,7 +57,6 @@ enum { ETHTOOL_MSG_PLCA_GET_STATUS, ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, - ETHTOOL_MSG_PHY_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -110,8 +109,6 @@ enum { ETHTOOL_MSG_PLCA_NTF, ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, - ETHTOOL_MSG_PHY_GET_REPLY, - ETHTOOL_MSG_PHY_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -136,7 +133,6 @@ enum { ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ ETHTOOL_A_HEADER_DEV_NAME, /* string */ ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ - ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ /* add new constants above here */ __ETHTOOL_A_HEADER_CNT, @@ -980,32 +976,6 @@ enum { ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; -enum { - ETHTOOL_A_PHY_UPSTREAM_UNSPEC, - ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */ - ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */ - - /* add new constants above here */ - __ETHTOOL_A_PHY_UPSTREAM_CNT, - ETHTOOL_A_PHY_UPSTREAM_MAX = (__ETHTOOL_A_PHY_UPSTREAM_CNT - 1) -}; - -enum { - ETHTOOL_A_PHY_UNSPEC, - ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PHY_INDEX, /* u32 */ - ETHTOOL_A_PHY_DRVNAME, /* string */ - ETHTOOL_A_PHY_NAME, /* string */ - ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u8 */ - ETHTOOL_A_PHY_UPSTREAM, /* nest - _A_PHY_UPSTREAM_* */ - ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */ - ETHTOOL_A_PHY_ID, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_PHY_CNT, - ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) -}; - /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3 From 8a6286c1804e2c7144aef3154a0357c4b496e10b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 3 Jan 2024 14:28:36 +0100 Subject: dpll: expose fractional frequency offset value to user Add a new netlink attribute to expose fractional frequency offset value for a pin. Add an op to get the value from the driver. Signed-off-by: Jiri Pirko Acked-by: Vadim Fedorenko Acked-by: Arkadiusz Kubalewski Link: https://lore.kernel.org/r/20240103132838.1501801-2-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- include/uapi/linux/dpll.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index 715a491d2727..b4e947f9bfbc 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -179,6 +179,7 @@ enum dpll_a_pin { DPLL_A_PIN_PHASE_ADJUST_MAX, DPLL_A_PIN_PHASE_ADJUST, DPLL_A_PIN_PHASE_OFFSET, + DPLL_A_PIN_FRACTIONAL_FREQUENCY_OFFSET, __DPLL_A_PIN_MAX, DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1) -- cgit v1.2.3 From 35967bdcff325f4572b21b0d0005318da7e03f53 Mon Sep 17 00:00:00 2001 From: Changyuan Lyu Date: Wed, 20 Dec 2023 12:49:06 -0800 Subject: virtio_pmem: support feature SHMEM_REGION This patch adds the support for feature VIRTIO_PMEM_F_SHMEM_REGION (virtio spec v1.2 section 5.19.5.2 [1]). During feature negotiation, if VIRTIO_PMEM_F_SHMEM_REGION is offered by the device, the driver looks for a shared memory region of id 0. If it is found, this feature is understood. Otherwise, this feature bit is cleared. During probe, if VIRTIO_PMEM_F_SHMEM_REGION has been negotiated, virtio pmem ignores the `start` and `size` fields in device config and uses the physical address range of shared memory region 0. [1] https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html#x1-6480002 Signed-off-by: Changyuan Lyu Message-Id: <20231220204906.566922-1-changyuanl@google.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/uapi/linux/virtio_pmem.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_pmem.h b/include/uapi/linux/virtio_pmem.h index d676b3620383..ede4f3564977 100644 --- a/include/uapi/linux/virtio_pmem.h +++ b/include/uapi/linux/virtio_pmem.h @@ -14,6 +14,13 @@ #include #include +/* Feature bits */ +/* guest physical address range will be indicated as shared memory region 0 */ +#define VIRTIO_PMEM_F_SHMEM_REGION 0 + +/* shmid of the shared memory region corresponding to the pmem */ +#define VIRTIO_PMEM_SHMEM_REGION_ID 0 + struct virtio_pmem_config { __le64 start; __le64 size; -- cgit v1.2.3 From 8c6eabae3807e048b9f17733af5e20500fbf858c Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 10 Jan 2024 20:10:09 -0800 Subject: iommufd: Add IOMMU_HWPT_INVALIDATE In nested translation, the stage-1 page table is user-managed but cached by the IOMMU hardware, so an update on present page table entries in the stage-1 page table should be followed with a cache invalidation. Add an IOMMU_HWPT_INVALIDATE ioctl to support such a cache invalidation. It takes hwpt_id to specify the iommu_domain, and a multi-entry array to support multiple invalidation data in one ioctl. enum iommu_hwpt_invalidate_data_type is defined to tag the data type of the entries in the multi-entry array. Link: https://lore.kernel.org/r/20240111041015.47920-3-yi.l.liu@intel.com Reviewed-by: Kevin Tian Co-developed-by: Nicolin Chen Signed-off-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 0b2bc6252e2c..824560c50ec6 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -49,6 +49,7 @@ enum { IOMMUFD_CMD_GET_HW_INFO, IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, + IOMMUFD_CMD_HWPT_INVALIDATE, }; /** @@ -613,4 +614,46 @@ struct iommu_hwpt_get_dirty_bitmap { #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) +/** + * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation + * Data Type + * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 + */ +enum iommu_hwpt_invalidate_data_type { + IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, +}; + +/** + * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) + * @size: sizeof(struct iommu_hwpt_invalidate) + * @hwpt_id: ID of a nested HWPT for cache invalidation + * @data_uptr: User pointer to an array of driver-specific cache invalidation + * data. + * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data + * type of all the entries in the invalidation request array. It + * should be a type supported by the hwpt pointed by @hwpt_id. + * @entry_len: Length (in bytes) of a request entry in the request array + * @entry_num: Input the number of cache invalidation requests in the array. + * Output the number of requests successfully handled by kernel. + * @__reserved: Must be 0. + * + * Invalidate the iommu cache for user-managed page table. Modifications on a + * user-managed page table should be followed by this operation to sync cache. + * Each ioctl can support one or more cache invalidation requests in the array + * that has a total size of @entry_len * @entry_num. + * + * An empty invalidation request array by setting @entry_num==0 is allowed, and + * @entry_len and @data_uptr would be ignored in this case. This can be used to + * check if the given @data_type is supported or not by kernel. + */ +struct iommu_hwpt_invalidate { + __u32 size; + __u32 hwpt_id; + __aligned_u64 data_uptr; + __u32 data_type; + __u32 entry_len; + __u32 entry_num; + __u32 __reserved; +}; +#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) #endif -- cgit v1.2.3 From 393a5778b72a7b551493d0fd3fbe0282154058fe Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 10 Jan 2024 20:10:14 -0800 Subject: iommufd: Add data structure for Intel VT-d stage-1 cache invalidation This adds the data structure invalidating caches for the nested domain allocated with IOMMU_HWPT_DATA_VTD_S1 type. Link: https://lore.kernel.org/r/20240111041015.47920-8-yi.l.liu@intel.com Reviewed-by: Kevin Tian Signed-off-by: Lu Baolu Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 824560c50ec6..1dfeaa2e649e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -623,6 +623,42 @@ enum iommu_hwpt_invalidate_data_type { IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, }; +/** + * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d + * stage-1 cache invalidation + * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies + * to all-levels page structure cache or just + * the leaf PTE cache. + */ +enum iommu_hwpt_vtd_s1_invalidate_flags { + IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0, +}; + +/** + * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation + * (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) + * @addr: The start address of the range to be invalidated. It needs to + * be 4KB aligned. + * @npages: Number of contiguous 4K pages to be invalidated. + * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags + * @__reserved: Must be 0 + * + * The Intel VT-d specific invalidation data for user-managed stage-1 cache + * invalidation in nested translation. Userspace uses this structure to + * tell the impacted cache scope after modifying the stage-1 page table. + * + * Invalidating all the caches related to the page table by setting @addr + * to be 0 and @npages to be U64_MAX. + * + * The device TLB will be invalidated automatically if ATS is enabled. + */ +struct iommu_hwpt_vtd_s1_invalidate { + __aligned_u64 addr; + __aligned_u64 npages; + __u32 flags; + __u32 __reserved; +}; + /** * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) * @size: sizeof(struct iommu_hwpt_invalidate) -- cgit v1.2.3