From a9e4fb51425f680f191bb65c2778cea31bf058c7 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sun, 9 Jan 2022 11:37:04 -0500 Subject: drm/panfrost: Update create_bo flags comment Update a comment stating create_bo took no flags, since it now takes a bit mask of optional flags NOEXEC and HEAP. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20220109163704.2564-1-alyssa.rosenzweig@collabora.com --- include/uapi/drm/panfrost_drm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 061e700dd06c..9e40277d8185 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -84,14 +84,14 @@ struct drm_panfrost_wait_bo { __s64 timeout_ns; /* absolute */ }; +/* Valid flags to pass to drm_panfrost_create_bo */ #define PANFROST_BO_NOEXEC 1 #define PANFROST_BO_HEAP 2 /** * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs. * - * There are currently no values for the flags argument, but it may be - * used in a future extension. + * The flags argument is a bit mask of PANFROST_BO_* flags. */ struct drm_panfrost_create_bo { __u32 size; -- cgit v1.2.3 From c4381d0ee81930097e94e55d1c23f85798ffd093 Mon Sep 17 00:00:00 2001 From: Bokun Zhang Date: Wed, 12 Jan 2022 10:34:11 -0500 Subject: drm/amdgpu: Add interface to load SRIOV cap FW - Add interface to load SRIOV cap FW. If the FW does not exist, simply skip this FW loading routine. This FW will only be loaded under SRIOV. Other driver configuration will not be affected. By adding this interface, it will make us easier to prepare SRIOV Linux guest driver for different users. - Update sysfs interface to read cap FW version. - Refactor PSP FW loading routine under SRIOV to use a unified SWITCH statement instead of using IF statement - Remove redundant amdgpu_sriov_vf() check in FW loading routine Acked-by: Monk Liu Acked-by: Guchun Chen Signed-off-by: Bokun Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 0b94ec7b73e7..be4f9111f478 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -728,6 +728,8 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_DMCUB 0x14 /* Subquery id: Query TOC firmware version */ #define AMDGPU_INFO_FW_TOC 0x15 + /* Subquery id: Query CAP firmware version */ + #define AMDGPU_INFO_FW_CAP 0x16 /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f -- cgit v1.2.3 From e40fbbf0572c5e41dc87ad79001748ed399ce32d Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Wed, 19 Jan 2022 11:44:40 +0000 Subject: uapi/bpf: Add missing description and returns for helper documentation Both description and returns section will become mandatory for helpers and syscalls in a later commit to generate man pages. This commit also adds in the documentation that BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN for anyone searching for the syscall in the generated man pages. Signed-off-by: Usama Arif Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220119114442.1452088-1-usama.arif@bytedance.com --- include/uapi/linux/bpf.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b0383d371b9a..a9c96c21330a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -330,6 +330,8 @@ union bpf_iter_link_info { * *ctx_out*, *data_in* and *data_out* must be NULL. * *repeat* must be zero. * + * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN. + * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. @@ -1775,6 +1777,8 @@ union bpf_attr { * 0 on success, or a negative error in case of failure. * * u64 bpf_get_current_pid_tgid(void) + * Description + * Get the current pid and tgid. * Return * A 64-bit integer containing the current tgid and pid, and * created as such: @@ -1782,6 +1786,8 @@ union bpf_attr { * *current_task*\ **->pid**. * * u64 bpf_get_current_uid_gid(void) + * Description + * Get the current uid and gid. * Return * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. @@ -2256,6 +2262,8 @@ union bpf_attr { * The 32-bit hash. * * u64 bpf_get_current_task(void) + * Description + * Get the current task. * Return * A pointer to the current task struct. * @@ -2369,6 +2377,8 @@ union bpf_attr { * indicate that the hash is outdated and to trigger a * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * Return + * void. * * long bpf_get_numa_node_id(void) * Description @@ -2466,6 +2476,8 @@ union bpf_attr { * A 8-byte long unique number or 0 if *sk* is NULL. * * u32 bpf_get_socket_uid(struct sk_buff *skb) + * Description + * Get the owner UID of the socked associated to *skb*. * Return * The owner UID of the socket associated to *skb*. If the socket * is **NULL**, or if it is not a full socket (i.e. if it is a @@ -3240,6 +3252,9 @@ union bpf_attr { * The id is returned or 0 in case the id could not be retrieved. * * u64 bpf_get_current_cgroup_id(void) + * Description + * Get the current cgroup id based on the cgroup within which + * the current task is running. * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. -- cgit v1.2.3 From b44123b4a3dcad4664d3a0f72c011ffd4c9c4d93 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Thu, 16 Dec 2021 02:04:27 +0000 Subject: bpf: Add cgroup helpers bpf_{get,set}_retval to get/set syscall return value The helpers continue to use int for retval because all the hooks are int-returning rather than long-returning. The return value of bpf_set_retval is int for future-proofing, in case in the future there may be errors trying to set the retval. After the previous patch, if a program rejects a syscall by returning 0, an -EPERM will be generated no matter if the retval is already set to -err. This patch change it being forced only if retval is not -err. This is because we want to support, for example, invoking bpf_set_retval(-EINVAL) and return 0, and have the syscall return value be -EINVAL not -EPERM. For BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY, the prior behavior is that, if the return value is NET_XMIT_DROP, the packet is silently dropped. We preserve this behavior for backward compatibility reasons, so even if an errno is set, the errno does not return to caller. However, setting a non-err to retval cannot propagate so this is not allowed and we return a -EFAULT in that case. Signed-off-by: YiFei Zhu Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/r/b4013fd5d16bed0b01977c1fafdeae12e1de61fb.1639619851.git.zhuyifei@google.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a9c96c21330a..fe2272defcd9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5033,6 +5033,22 @@ union bpf_attr { * * Return * The number of arguments of the traced function. + * + * int bpf_get_retval(void) + * Description + * Get the syscall's return value that will be returned to userspace. + * + * This helper is currently supported by cgroup programs only. + * Return + * The syscall's return value. + * + * int bpf_set_retval(int retval) + * Description + * Set the syscall's return value that will be returned to userspace. + * + * This helper is currently supported by cgroup programs only. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5221,6 +5237,8 @@ union bpf_attr { FN(get_func_arg), \ FN(get_func_ret), \ FN(get_func_arg_cnt), \ + FN(get_retval), \ + FN(set_retval), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From c2f2cdbeffda7b153c19e0f3d73149c41026c0db Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 21 Jan 2022 11:09:52 +0100 Subject: bpf: introduce BPF_F_XDP_HAS_FRAGS flag in prog_flags loading the ebpf program Introduce BPF_F_XDP_HAS_FRAGS and the related field in bpf_prog_aux in order to notify the driver the loaded program support xdp frags. Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/db2e8075b7032a356003f407d1b0deb99adaa0ed.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index fe2272defcd9..945649c67e03 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1113,6 +1113,11 @@ enum bpf_link_type { */ #define BPF_F_SLEEPABLE (1U << 4) +/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program + * fully support xdp frags. + */ +#define BPF_F_XDP_HAS_FRAGS (1U << 5) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * -- cgit v1.2.3 From 0165cc817075cf701e4289838f1d925ff1911b3e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 21 Jan 2022 11:09:54 +0100 Subject: bpf: introduce bpf_xdp_get_buff_len helper Introduce bpf_xdp_get_buff_len helper in order to return the xdp buffer total size (linear and paged area) Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/aac9ac3504c84026cf66a3c71b7c5ae89bc991be.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 945649c67e03..5a28772063f6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5054,6 +5054,12 @@ union bpf_attr { * This helper is currently supported by cgroup programs only. * Return * 0 on success, or a negative error in case of failure. + * + * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md) + * Description + * Get the total size of a given xdp buff (linear and paged area) + * Return + * The total size of a given xdp buffer. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5244,6 +5250,7 @@ union bpf_attr { FN(get_func_arg_cnt), \ FN(get_retval), \ FN(set_retval), \ + FN(xdp_get_buff_len), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 3f364222d032eea6b245780e845ad213dab28cdd Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 21 Jan 2022 11:10:03 +0100 Subject: net: xdp: introduce bpf_xdp_pointer utility routine Similar to skb_header_pointer, introduce bpf_xdp_pointer utility routine to return a pointer to a given position in the xdp_buff if the requested area (offset + len) is contained in a contiguous memory area otherwise it will be copied in a bounce buffer provided by the caller. Similar to the tc counterpart, introduce the two following xdp helpers: - bpf_xdp_load_bytes - bpf_xdp_store_bytes Reviewed-by: Eelco Chaudron Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Acked-by: Jakub Kicinski Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/ab285c1efdd5b7a9d361348b1e7d3ef49f6382b3.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5a28772063f6..16a7574292a5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5060,6 +5060,22 @@ union bpf_attr { * Get the total size of a given xdp buff (linear and paged area) * Return * The total size of a given xdp buffer. + * + * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * This helper is provided as an easy way to load data from a + * xdp buffer. It can be used to load *len* bytes from *offset* from + * the frame associated to *xdp_md*, into the buffer pointed by + * *buf*. + * Return + * 0 on success, or a negative error in case of failure. + * + * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * Store *len* bytes from buffer *buf* into the frame + * associated to *xdp_md*, at *offset*. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5251,6 +5267,8 @@ union bpf_attr { FN(get_retval), \ FN(set_retval), \ FN(xdp_get_buff_len), \ + FN(xdp_load_bytes), \ + FN(xdp_store_bytes), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 376040e47334c6dc6a939a32197acceb00fe4acf Mon Sep 17 00:00:00 2001 From: Kenny Yu Date: Mon, 24 Jan 2022 10:54:01 -0800 Subject: bpf: Add bpf_copy_from_user_task() helper This adds a helper for bpf programs to read the memory of other tasks. As an example use case at Meta, we are using a bpf task iterator program and this new helper to print C++ async stack traces for all threads of a given process. Signed-off-by: Kenny Yu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220124185403.468466-3-kennyyu@fb.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 16a7574292a5..4a2f7041ebae 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5076,6 +5076,16 @@ union bpf_attr { * associated to *xdp_md*, at *offset*. * Return * 0 on success, or a negative error in case of failure. + * + * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags) + * Description + * Read *size* bytes from user space address *user_ptr* in *tsk*'s + * address space, and stores the data in *dst*. *flags* is not + * used yet and is provided for future extensibility. This helper + * can only be used by sleepable programs. + * Return + * 0 on success, or a negative error in case of failure. On error + * *dst* buffer is zeroed out. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5269,6 +5279,7 @@ union bpf_attr { FN(xdp_get_buff_len), \ FN(xdp_load_bytes), \ FN(xdp_store_bytes), \ + FN(copy_from_user_task), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From f23653fe64479d96910bfda2b700b1af17c991ac Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 26 Jan 2022 09:22:54 +0000 Subject: tty: Partially revert the removal of the Cyclades public API Fix a user API regression introduced with commit f76edd8f7ce0 ("tty: cyclades, remove this orphan"), which removed a part of the API and caused compilation errors for user programs using said part, such as GCC 9 in its libsanitizer component[1]: .../libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc:160:10: fatal error: linux/cyclades.h: No such file or directory 160 | #include | ^~~~~~~~~~~~~~~~~~ compilation terminated. make[4]: *** [Makefile:664: sanitizer_platform_limits_posix.lo] Error 1 As the absolute minimum required bring `struct cyclades_monitor' and ioctl numbers back then so as to make the library build again. Add a preprocessor warning as to the obsolescence of the features provided. References: [1] GCC PR sanitizer/100379, "cyclades.h is removed from linux kernel header files", Fixes: f76edd8f7ce0 ("tty: cyclades, remove this orphan") Cc: stable@vger.kernel.org # v5.13+ Reviewed-by: Christoph Hellwig Signed-off-by: Maciej W. Rozycki Link: https://lore.kernel.org/r/alpine.DEB.2.20.2201260733430.11348@tpp.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/cyclades.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 include/uapi/linux/cyclades.h (limited to 'include/uapi') diff --git a/include/uapi/linux/cyclades.h b/include/uapi/linux/cyclades.h new file mode 100644 index 000000000000..6225c5aebe06 --- /dev/null +++ b/include/uapi/linux/cyclades.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_CYCLADES_H +#define _UAPI_LINUX_CYCLADES_H + +#warning "Support for features provided by this header has been removed" +#warning "Please consider updating your code" + +struct cyclades_monitor { + unsigned long int_count; + unsigned long char_count; + unsigned long char_max; + unsigned long char_last; +}; + +#define CYGETMON 0x435901 +#define CYGETTHRESH 0x435902 +#define CYSETTHRESH 0x435903 +#define CYGETDEFTHRESH 0x435904 +#define CYSETDEFTHRESH 0x435905 +#define CYGETTIMEOUT 0x435906 +#define CYSETTIMEOUT 0x435907 +#define CYGETDEFTIMEOUT 0x435908 +#define CYSETDEFTIMEOUT 0x435909 +#define CYSETRFLOW 0x43590a +#define CYGETRFLOW 0x43590b +#define CYSETRTSDTR_INV 0x43590c +#define CYGETRTSDTR_INV 0x43590d +#define CYZSETPOLLCYCLE 0x43590e +#define CYZGETPOLLCYCLE 0x43590f +#define CYGETCD1400VER 0x435910 +#define CYSETWAIT 0x435912 +#define CYGETWAIT 0x435913 + +#endif /* _UAPI_LINUX_CYCLADES_H */ -- cgit v1.2.3 From 8cda7a4f96e435be2fd074009d69521d973d7d31 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 7 Jan 2022 17:57:33 -0500 Subject: drm/amdgpu/UAPI: add new CTX OP to get/set stable pstates Add a new CTX ioctl operation to set stable pstates for profiling. When creating traces for tools like RGP or using SPM or doing performance profiling, it's required to enable a special stable profiling power state on the GPU. These profiling states set fixed clocks and disable certain other power features like powergating which may impact the results. Historically, these profiling pstates were enabled via sysfs, but this adds an interface to enable it via the CTX ioctl from the application. Since the power state is global only one application can set it at a time, so if multiple applications try and use it only the first will get it, the ioctl will return -EBUSY for others. The sysfs interface will override whatever has been set by this interface. Mesa MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/207 v2: don't default r = 0; v3: rebase on Evan's PM cleanup Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index be4f9111f478..76b580d10a52 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -206,6 +206,8 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_FREE_CTX 2 #define AMDGPU_CTX_OP_QUERY_STATE 3 #define AMDGPU_CTX_OP_QUERY_STATE2 4 +#define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 +#define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 @@ -238,10 +240,18 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_PRIORITY_HIGH 512 #define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023 +/* select a stable profiling pstate for perfmon tools */ +#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK 0xf +#define AMDGPU_CTX_STABLE_PSTATE_NONE 0 +#define AMDGPU_CTX_STABLE_PSTATE_STANDARD 1 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK 2 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 +#define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 + struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ __u32 op; - /** For future use, no flags defined so far */ + /** Flags */ __u32 flags; __u32 ctx_id; /** AMDGPU_CTX_PRIORITY_* */ @@ -262,6 +272,11 @@ union drm_amdgpu_ctx_out { /** Reset status since the last call of the ioctl. */ __u32 reset_status; } state; + + struct { + __u32 flags; + __u32 _pad; + } pstate; }; union drm_amdgpu_ctx { -- cgit v1.2.3 From dd6e631220181162478984d2d46dd979e04d8e75 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Jan 2022 07:49:45 -0500 Subject: KVM: x86: add system attribute to retrieve full set of supported xsave states Because KVM_GET_SUPPORTED_CPUID is meant to be passed (by simple-minded VMMs) to KVM_SET_CPUID2, it cannot include any dynamic xsave states that have not been enabled. Probing those, for example so that they can be passed to ARCH_REQ_XCOMP_GUEST_PERM, requires a new ioctl or arch_prctl. The latter is in fact worse, even though that is what the rest of the API uses, because it would require supported_xcr0 to be moved from the KVM module to the kernel just for this use. In addition, the value would be nonsensical (or an error would have to be returned) until the KVM module is loaded in. Therefore, to limit the growth of system ioctls, add a /dev/kvm variant of KVM_{GET,HAS}_DEVICE_ATTR, and implement it in x86 with just one group (0) and attribute (KVM_X86_XCOMP_GUEST_SUPP). Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 9563d294f181..b46bcdb0cab1 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 +#define KVM_CAP_SYS_ATTRIBUTES 209 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 9690ae60429020f38e4aa2540c306f27eb021bc0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Jan 2022 10:42:59 -0800 Subject: ethtool: add header/data split indication For applications running on a mix of platforms it's useful to have a clear indication whether host's NIC supports the geometry requirements of TCP zero-copy. TCP zero-copy Rx requires data to be neatly placed into memory pages. Most NICs can't do that. This patch is adding GET support only, since the NICs I work with either always have the feature enabled or enable it whenever MTU is set to jumbo. In other words I don't need SET. But adding set should be trivial. (The only note on SET is that we will likely want the setting to be "sticky" and use 0 / `unknown` to reset it back to driver default.) Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index cca6e474a085..417d4280d7b5 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -318,6 +318,12 @@ enum { /* RINGS */ +enum { + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0, + ETHTOOL_TCP_DATA_SPLIT_DISABLED, + ETHTOOL_TCP_DATA_SPLIT_ENABLED, +}; + enum { ETHTOOL_A_RINGS_UNSPEC, ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ @@ -330,6 +336,7 @@ enum { ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ ETHTOOL_A_RINGS_TX, /* u32 */ ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ + ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, -- cgit v1.2.3 From b2a90f4fcb146d0e033203ab646f0fd22cfa947f Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 13 Jan 2022 11:20:22 +0100 Subject: media: lirc: remove unused lirc features These features have never been implemented by any lirc driver, including staging or out of tree drivers. The ioctls for these feaures were removed in commit d55f09abe24b ("[media] lirc.h: remove several unused ioctls"). So, we can safely remove them. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index 9919f2062b14..a1f9c26ea537 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -72,11 +72,9 @@ #define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16) #define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16) -#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000 #define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000 #define LIRC_CAN_GET_REC_RESOLUTION 0x20000000 #define LIRC_CAN_SET_REC_TIMEOUT 0x10000000 -#define LIRC_CAN_SET_REC_FILTER 0x08000000 #define LIRC_CAN_MEASURE_CARRIER 0x02000000 #define LIRC_CAN_USE_WIDEBAND_RECEIVER 0x04000000 @@ -84,8 +82,6 @@ #define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK) #define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK) -#define LIRC_CAN_NOTIFY_DECODE 0x01000000 - /*** IOCTL commands for lirc driver ***/ #define LIRC_GET_FEATURES _IOR('i', 0x00000000, __u32) -- cgit v1.2.3 From 68a99f6a0ebfe9101ea79ba5af1c407a5ad4f629 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sat, 15 Jan 2022 11:19:11 +0100 Subject: media: lirc: report ir receiver overflow If the driver reports that the hardware had an overflow, report this to userspace. It would be nice to know when this happens, and not just get a long space. This change has been tested with lircd, ir-ctl, and ir-keytable. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index a1f9c26ea537..21c69a6a100d 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -16,14 +16,16 @@ #define LIRC_MODE2_PULSE 0x01000000 #define LIRC_MODE2_FREQUENCY 0x02000000 #define LIRC_MODE2_TIMEOUT 0x03000000 +#define LIRC_MODE2_OVERFLOW 0x04000000 #define LIRC_VALUE_MASK 0x00FFFFFF #define LIRC_MODE2_MASK 0xFF000000 -#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE) -#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE) -#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY) -#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT) +#define LIRC_SPACE(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_SPACE) +#define LIRC_PULSE(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_PULSE) +#define LIRC_FREQUENCY(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY) +#define LIRC_TIMEOUT(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT) +#define LIRC_OVERFLOW(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_OVERFLOW) #define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK) #define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK) @@ -32,6 +34,7 @@ #define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE) #define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY) #define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT) +#define LIRC_IS_OVERFLOW(val) (LIRC_MODE2(val) == LIRC_MODE2_OVERFLOW) /* used heavily by lirc userspace */ #define lirc_t int -- cgit v1.2.3 From f6c6804c43fa18d3cee64b55490dfbd3bef1363a Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 28 Jan 2022 15:40:25 +0000 Subject: kvm: Move KVM_GET_XSAVE2 IOCTL definition at the end of kvm.h This way we can more easily find the next free IOCTL number when adding new IOCTLs. Fixes: be50b2065dfa ("kvm: x86: Add support for getting/setting expanded xstate buffer") Signed-off-by: Janosch Frank Message-Id: <20220128154025.102666-1-frankja@linux.ibm.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b46bcdb0cab1..5191b57e1562 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1624,9 +1624,6 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) -/* Available with KVM_CAP_XSAVE2 */ -#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) - struct kvm_s390_pv_sec_parm { __u64 origin; __u64 length; @@ -2048,4 +2045,7 @@ struct kvm_stats_desc { #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From e187013abeb4c2a7ec8a4bb978844c7e92a1a6ec Mon Sep 17 00:00:00 2001 From: Akhmat Karakotov Date: Mon, 31 Jan 2022 16:31:21 +0300 Subject: txhash: Make rethinking txhash behavior configurable via sysctl Add a per ns sysctl that controls the txhash rethink behavior: net.core.txrehash. When enabled, the same behavior is retained, when disabled, rethink is not performed. Sysctl is enabled by default. Signed-off-by: Akhmat Karakotov Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/socket.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index eb0a9a5b6e71..0accd6102ece 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -31,4 +31,7 @@ struct __kernel_sockaddr_storage { #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) +#define SOCK_TXREHASH_DISABLED 0 +#define SOCK_TXREHASH_ENABLED 1 + #endif /* _UAPI_LINUX_SOCKET_H */ -- cgit v1.2.3 From 26859240e4ee701e0379f08634957adaff67e43a Mon Sep 17 00:00:00 2001 From: Akhmat Karakotov Date: Mon, 31 Jan 2022 16:31:22 +0300 Subject: txhash: Add socket option to control TX hash rethink behavior Add the SO_TXREHASH socket option to control hash rethink behavior per socket. When default mode is set, sockets disable rehash at initialization and use sysctl option when entering listen state. setsockopt() overrides default behavior. Signed-off-by: Akhmat Karakotov Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/asm-generic/socket.h | 2 ++ include/uapi/linux/socket.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index c77a1313b3b0..467ca2f28760 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -128,6 +128,8 @@ #define SO_RESERVE_MEM 73 +#define SO_TXREHASH 74 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index 0accd6102ece..51d6bb2f6765 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -31,6 +31,7 @@ struct __kernel_sockaddr_storage { #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) +#define SOCK_TXREHASH_DEFAULT ((u8)-1) #define SOCK_TXREHASH_DISABLED 0 #define SOCK_TXREHASH_ENABLED 1 -- cgit v1.2.3 From 4421a582718ab81608d8486734c18083b822390d Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sun, 30 Jan 2022 12:55:17 +0100 Subject: bpf: Make dst_port field in struct bpf_sock 16-bit wide Menglong Dong reports that the documentation for the dst_port field in struct bpf_sock is inaccurate and confusing. From the BPF program PoV, the field is a zero-padded 16-bit integer in network byte order. The value appears to the BPF user as if laid out in memory as so: offsetof(struct bpf_sock, dst_port) + 0 + 8 +16 0x00 +24 0x00 32-, 16-, and 8-bit wide loads from the field are all allowed, but only if the offset into the field is 0. 32-bit wide loads from dst_port are especially confusing. The loaded value, after converting to host byte order with bpf_ntohl(dst_port), contains the port number in the upper 16-bits. Remove the confusion by splitting the field into two 16-bit fields. For backward compatibility, allow 32-bit wide loads from offsetof(struct bpf_sock, dst_port). While at it, allow loads 8-bit loads at offset [0] and [1] from dst_port. Reported-by: Menglong Dong Signed-off-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20220130115518.213259-2-jakub@cloudflare.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4a2f7041ebae..a7f0ddedac1f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5574,7 +5574,8 @@ struct bpf_sock { __u32 src_ip4; __u32 src_ip6[4]; __u32 src_port; /* host byte order */ - __u32 dst_port; /* network byte order */ + __be16 dst_port; /* network byte order */ + __u16 :16; /* zero padding */ __u32 dst_ip4; __u32 dst_ip6[4]; __u32 state; -- cgit v1.2.3 From bfdf4e6208051ed7165b2e92035b4bf11f43eb63 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 27 Jan 2022 10:27:20 -0500 Subject: rseq: Remove broken uapi field layout on 32-bit little endian The rseq rseq_cs.ptr.{ptr32,padding} uapi endianness handling is entirely wrong on 32-bit little endian: a preprocessor logic mistake wrongly uses the big endian field layout on 32-bit little endian architectures. Fortunately, those ptr32 accessors were never used within the kernel, and only meant as a convenience for user-space. Remove those and replace the whole rseq_cs union by a __u64 type, as this is the only thing really needed to express the ABI. Document how 32-bit architectures are meant to interact with this field. Fixes: ec9c82e03a74 ("rseq: uapi: Declare rseq_cs field as union, update includes") Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220127152720.25898-1-mathieu.desnoyers@efficios.com --- include/uapi/linux/rseq.h | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 9a402fdb60e9..77ee207623a9 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -105,23 +105,11 @@ struct rseq { * Read and set by the kernel. Set by user-space with single-copy * atomicity semantics. This field should only be updated by the * thread which registered this data structure. Aligned on 64-bit. + * + * 32-bit architectures should update the low order bits of the + * rseq_cs field, leaving the high order bits initialized to 0. */ - union { - __u64 ptr64; -#ifdef __LP64__ - __u64 ptr; -#else - struct { -#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN) - __u32 padding; /* Initialized to zero. */ - __u32 ptr32; -#else /* LITTLE */ - __u32 ptr32; - __u32 padding; /* Initialized to zero. */ -#endif /* ENDIAN */ - } ptr; -#endif - } rseq_cs; + __u64 rseq_cs; /* * Restartable sequences flags field. -- cgit v1.2.3 From ddecd22878601a606d160680fa85802b75d92eb6 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 31 Jan 2022 11:34:07 +0100 Subject: perf: uapi: Document perf_event_attr::sig_data truncation on 32 bit architectures Due to the alignment requirements of siginfo_t, as described in 3ddb3fd8cdb0 ("signal, perf: Fix siginfo_t by avoiding u64 on 32-bit architectures"), siginfo_t::si_perf_data is limited to an unsigned long. However, perf_event_attr::sig_data is an u64, to avoid having to deal with compat conversions. Due to being an u64, it may not immediately be clear to users that sig_data is truncated on 32 bit architectures. Add a comment to explicitly point this out, and hopefully help some users save time by not having to deduce themselves what's happening. Reported-by: Dmitry Vyukov Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Link: https://lore.kernel.org/r/20220131103407.1971678-3-elver@google.com --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1b65042ab1db..82858b697c05 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -465,6 +465,8 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via * siginfo_t::si_perf_data, e.g. to permit user to identify the event. + * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be + * truncated accordingly on 32 bit architectures. */ __u64 sig_data; }; -- cgit v1.2.3 From c86d86131ab75696fc52d98571148842e067d620 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 2 Feb 2022 06:09:04 +0300 Subject: Partially revert "net/smc: Add netlink net namespace support" The change of sizeof(struct smc_diag_linkinfo) by commit 79d39fc503b4 ("net/smc: Add netlink net namespace support") introduced an ABI regression: since struct smc_diag_lgrinfo contains an object of type "struct smc_diag_linkinfo", offset of all subsequent members of struct smc_diag_lgrinfo was changed by that change. As result, applications compiled with the old version of struct smc_diag_linkinfo will receive garbage in struct smc_diag_lgrinfo.role if the kernel implements this new version of struct smc_diag_linkinfo. Fix this regression by reverting the part of commit 79d39fc503b4 that changes struct smc_diag_linkinfo. After all, there is SMC_GEN_NETLINK interface which is good enough, so there is probably no need to touch the smc_diag ABI in the first place. Fixes: 79d39fc503b4 ("net/smc: Add netlink net namespace support") Signed-off-by: Dmitry V. Levin Reviewed-by: Karsten Graul Link: https://lore.kernel.org/r/20220202030904.GA9742@altlinux.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/smc_diag.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index c7008d87f1a4..8cb3a6fef553 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -84,12 +84,11 @@ struct smc_diag_conninfo { /* SMC_DIAG_LINKINFO */ struct smc_diag_linkinfo { - __u8 link_id; /* link identifier */ - __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ - __u8 ibport; /* RDMA device port number */ - __u8 gid[40]; /* local GID */ - __u8 peer_gid[40]; /* peer GID */ - __aligned_u64 net_cookie; /* RDMA device net namespace */ + __u8 link_id; /* link identifier */ + __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ + __u8 ibport; /* RDMA device port number */ + __u8 gid[40]; /* local GID */ + __u8 peer_gid[40]; /* peer GID */ }; struct smc_diag_lgrinfo { -- cgit v1.2.3 From 164666fa66669d437bdcc8d5f1744a2aee73be41 Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Mon, 31 Jan 2022 12:23:07 -0500 Subject: Improve docs for IOCTL_GNTDEV_MAP_GRANT_REF --------------cKY3Ggs6VDUCSn4I6iN78sHA Content-Type: multipart/mixed; boundary="------------g0T69ASidFiPhh4eOY4XzIg1" --------------g0T69ASidFiPhh4eOY4XzIg1 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable The current implementation of gntdev guarantees that the first call to IOCTL_GNTDEV_MAP_GRANT_REF will set @index to 0. This is required to use gntdev for Wayland, which is a future desire of Qubes OS. Additionally, requesting zero grants results in an error, but this was not documented either. Document both of these. Signed-off-by: Demi Marie Obenour Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/f66c5a4e-2034-00b5-a635-6983bd999c07@gmail.com Signed-off-by: Juergen Gross --- include/uapi/xen/gntdev.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/xen/gntdev.h b/include/uapi/xen/gntdev.h index 9ac5515b9bc2..7a7145395c09 100644 --- a/include/uapi/xen/gntdev.h +++ b/include/uapi/xen/gntdev.h @@ -47,7 +47,13 @@ struct ioctl_gntdev_grant_ref { /* * Inserts the grant references into the mapping table of an instance * of gntdev. N.B. This does not perform the mapping, which is deferred - * until mmap() is called with @index as the offset. + * until mmap() is called with @index as the offset. @index should be + * considered opaque to userspace, with one exception: if no grant + * references have ever been inserted into the mapping table of this + * instance, @index will be set to 0. This is necessary to use gntdev + * with userspace APIs that expect a file descriptor that can be + * mmap()'d at offset 0, such as Wayland. If @count is set to 0, this + * ioctl will fail. */ #define IOCTL_GNTDEV_MAP_GRANT_REF \ _IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref)) -- cgit v1.2.3 From e4b1eb24ce5a696ef7229f9926ff34d7502f0582 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 27 Jan 2022 11:34:53 -0800 Subject: thermal: netlink: Add a new event to notify CPU capabilities change Add a new netlink event to notify change in CPU capabilities in terms of performance and efficiency. Firmware may change CPU capabilities as a result of thermal events in the system or to account for changes in the TDP (thermal design power) level. This notification type will allow user space to avoid running workloads on certain CPUs or proactively adjust power limits to avoid future events. The netlink message consists of a nested attribute (THERMAL_GENL_ATTR_CPU_CAPABILITY) with three attributes: * THERMAL_GENL_ATTR_CPU_CAPABILITY_ID (type u32): -- logical CPU number * THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE (type u32): -- Scaled performance from 0-1023 * THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY (type u32): -- Scaled efficiency from 0-1023 Reviewed-by: Len Brown Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/thermal.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index 9aa2fedfa309..fc78bf3aead7 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -44,7 +44,10 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_CDEV_MAX_STATE, THERMAL_GENL_ATTR_CDEV_NAME, THERMAL_GENL_ATTR_GOV_NAME, - + THERMAL_GENL_ATTR_CPU_CAPABILITY, + THERMAL_GENL_ATTR_CPU_CAPABILITY_ID, + THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE, + THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY, __THERMAL_GENL_ATTR_MAX, }; #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1) @@ -71,6 +74,7 @@ enum thermal_genl_event { THERMAL_GENL_EVENT_CDEV_DELETE, /* Cdev unbound */ THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, /* Cdev state updated */ THERMAL_GENL_EVENT_TZ_GOV_CHANGE, /* Governor policy changed */ + THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, /* CPU capability changed */ __THERMAL_GENL_EVENT_MAX, }; #define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1) -- cgit v1.2.3 From d1ca60efc53d665cf89ed847a14a510a81770b81 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 2 Feb 2022 12:00:56 +0100 Subject: netfilter: ctnetlink: disable helper autoassign When userspace, e.g. conntrackd, inserts an entry with a specified helper, its possible that the helper is lost immediately after its added: ctnetlink_create_conntrack -> nf_ct_helper_ext_add + assign helper -> ctnetlink_setup_nat -> ctnetlink_parse_nat_setup -> parse_nat_setup -> nfnetlink_parse_nat_setup -> nf_nat_setup_info -> nf_conntrack_alter_reply -> __nf_ct_try_assign_helper ... and __nf_ct_try_assign_helper will zero the helper again. Set IPS_HELPER bit to bypass auto-assign logic, its unwanted, just like when helper is assigned via ruleset. Dropped old 'not strictly necessary' comment, it referred to use of rcu_assign_pointer() before it got replaced by RCU_INIT_POINTER(). NB: Fixes tag intentionally incorrect, this extends the referenced commit, but this change won't build without IPS_HELPER introduced there. Fixes: 6714cf5465d280 ("netfilter: nf_conntrack: fix explicit helper attachment and NAT") Reported-by: Pham Thanh Tuyen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 4b3395082d15..26071021e986 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -106,7 +106,7 @@ enum ip_conntrack_status { IPS_NAT_CLASH = IPS_UNTRACKED, #endif - /* Conntrack got a helper explicitly attached via CT target. */ + /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */ IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), -- cgit v1.2.3 From 8b5413647262dda8d8d0e07e14ea1de9ac7cf0b2 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 17 Jan 2022 21:56:13 +0100 Subject: netfilter: nfqueue: enable to get skb->priority This info could be useful to improve traffic analysis. Signed-off-by: Nicolas Dichtel Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_queue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index aed90c4df0c8..ef7c97f21a15 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -61,6 +61,7 @@ enum nfqnl_attr_type { NFQA_SECCTX, /* security context string */ NFQA_VLAN, /* nested attribute: packet vlan info */ NFQA_L2HDR, /* full L2 header */ + NFQA_PRIORITY, /* skb->priority */ __NFQA_MAX }; -- cgit v1.2.3 From be847673cfffce8bb6e9ed6ae186081280c58831 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Wed, 2 Feb 2022 15:25:53 +0100 Subject: uapi: ioam: Insertion frequency Add the insertion frequency uapi for IOAM lwtunnels. Signed-off-by: Justin Iurman Signed-off-by: Jakub Kicinski --- include/uapi/linux/ioam6_iptunnel.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h index 829ffdfcacca..38f6a8fdfd34 100644 --- a/include/uapi/linux/ioam6_iptunnel.h +++ b/include/uapi/linux/ioam6_iptunnel.h @@ -41,6 +41,15 @@ enum { /* IOAM Trace Header */ IOAM6_IPTUNNEL_TRACE, /* struct ioam6_trace_hdr */ + /* Insertion frequency: + * "k over n" packets (0 < k <= n) + * [0.0001% ... 100%] + */ +#define IOAM6_IPTUNNEL_FREQ_MIN 1 +#define IOAM6_IPTUNNEL_FREQ_MAX 1000000 + IOAM6_IPTUNNEL_FREQ_K, /* u32 */ + IOAM6_IPTUNNEL_FREQ_N, /* u32 */ + __IOAM6_IPTUNNEL_MAX, }; -- cgit v1.2.3 From a3574119826d9a4ef807fb973cf5150c3b90da43 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Thu, 3 Feb 2022 13:38:06 +0000 Subject: drm: document struct drm_mode_fb_cmd2 Follow-up for the DRM_IOCTL_MODE_GETFB2 docs. v2: (Daniel Stone) - Replace fourcc.org with drm_fourcc.h because this is the authoritative source and the website may have mismatches. - Drop assumption that offsets will generally be 0. - Mention that unused entries must be zero'ed out. v3: (Pekka) - Mention that a handle can be re-used - Add unit for pitches/offsets Signed-off-by: Simon Ser Reviewed-by: Daniel Vetter Acked-by: Pekka Paalanen Cc: Daniel Stone Reviewed-by: Daniel Stone Link: https://patchwork.freedesktop.org/patch/msgid/20220203133753.261507-1-contact@emersion.fr --- include/uapi/drm/drm_mode.h | 88 ++++++++++++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 28 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index e1e351682872..0a0d56a6158e 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -663,41 +663,73 @@ struct drm_mode_fb_cmd { #define DRM_MODE_FB_INTERLACED (1<<0) /* for interlaced framebuffers */ #define DRM_MODE_FB_MODIFIERS (1<<1) /* enables ->modifer[] */ +/** + * struct drm_mode_fb_cmd2 - Frame-buffer metadata. + * + * This struct holds frame-buffer metadata. There are two ways to use it: + * + * - User-space can fill this struct and perform a &DRM_IOCTL_MODE_ADDFB2 + * ioctl to register a new frame-buffer. The new frame-buffer object ID will + * be set by the kernel in @fb_id. + * - User-space can set @fb_id and perform a &DRM_IOCTL_MODE_GETFB2 ioctl to + * fetch metadata about an existing frame-buffer. + * + * In case of planar formats, this struct allows up to 4 buffer objects with + * offsets and pitches per plane. The pitch and offset order is dictated by the + * format FourCC as defined by ``drm_fourcc.h``, e.g. NV12 is described as: + * + * YUV 4:2:0 image with a plane of 8 bit Y samples followed by an + * interleaved U/V plane containing 8 bit 2x2 subsampled colour difference + * samples. + * + * So it would consist of a Y plane at ``offsets[0]`` and a UV plane at + * ``offsets[1]``. + * + * To accommodate tiled, compressed, etc formats, a modifier can be specified. + * For more information see the "Format Modifiers" section. Note that even + * though it looks like we have a modifier per-plane, we in fact do not. The + * modifier for each plane must be identical. Thus all combinations of + * different data layouts for multi-plane formats must be enumerated as + * separate modifiers. + * + * All of the entries in @handles, @pitches, @offsets and @modifier must be + * zero when unused. Warning, for @offsets and @modifier zero can't be used to + * figure out whether the entry is used or not since it's a valid value (a zero + * offset is common, and a zero modifier is &DRM_FORMAT_MOD_LINEAR). + */ struct drm_mode_fb_cmd2 { + /** @fb_id: Object ID of the frame-buffer. */ __u32 fb_id; + /** @width: Width of the frame-buffer. */ __u32 width; + /** @height: Height of the frame-buffer. */ __u32 height; - __u32 pixel_format; /* fourcc code from drm_fourcc.h */ - __u32 flags; /* see above flags */ + /** + * @pixel_format: FourCC format code, see ``DRM_FORMAT_*`` constants in + * ``drm_fourcc.h``. + */ + __u32 pixel_format; + /** + * @flags: Frame-buffer flags (see &DRM_MODE_FB_INTERLACED and + * &DRM_MODE_FB_MODIFIERS). + */ + __u32 flags; - /* - * In case of planar formats, this ioctl allows up to 4 - * buffer objects with offsets and pitches per plane. - * The pitch and offset order is dictated by the fourcc, - * e.g. NV12 (https://fourcc.org/yuv.php#NV12) is described as: - * - * YUV 4:2:0 image with a plane of 8 bit Y samples - * followed by an interleaved U/V plane containing - * 8 bit 2x2 subsampled colour difference samples. - * - * So it would consist of Y as offsets[0] and UV as - * offsets[1]. Note that offsets[0] will generally - * be 0 (but this is not required). - * - * To accommodate tiled, compressed, etc formats, a - * modifier can be specified. The default value of zero - * indicates "native" format as specified by the fourcc. - * Vendor specific modifier token. Note that even though - * it looks like we have a modifier per-plane, we in fact - * do not. The modifier for each plane must be identical. - * Thus all combinations of different data layouts for - * multi plane formats must be enumerated as separate - * modifiers. + /** + * @handles: GEM buffer handle, one per plane. Set to 0 if the plane is + * unused. The same handle can be used for multiple planes. */ __u32 handles[4]; - __u32 pitches[4]; /* pitch for each plane */ - __u32 offsets[4]; /* offset of each plane */ - __u64 modifier[4]; /* ie, tiling, compress */ + /** @pitches: Pitch (aka. stride) in bytes, one per plane. */ + __u32 pitches[4]; + /** @offsets: Offset into the buffer in bytes, one per plane. */ + __u32 offsets[4]; + /** + * @modifier: Format modifier, one per plane. See ``DRM_FORMAT_MOD_*`` + * constants in ``drm_fourcc.h``. All planes must use the same + * modifier. Ignored unless &DRM_MODE_FB_MODIFIERS is set in @flags. + */ + __u64 modifier[4]; }; #define DRM_MODE_FB_DIRTY_ANNOTATE_COPY 0x01 -- cgit v1.2.3 From 3698807094ecae945436921325f5c309d1123f11 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 24 Aug 2021 16:13:41 -0400 Subject: drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs Checkpoint-Restore in userspace (CRIU) is a powerful tool that can snapshot a running process and later restore it on same or a remote machine but expects the processes that have a device file (e.g. GPU) associated with them, provide necessary driver support to assist CRIU and its extensible plugin interface. Thus, In order to support the Checkpoint-Restore of any ROCm process, the AMD Radeon Open Compute Kernel driver, needs to provide a set of new APIs that provide necessary VRAM metadata and its contents to a userspace component (CRIU plugin) that can store it in form of image files. This introduces some new ioctls which will be used to checkpoint-Restore any KFD bound user process. KFD only allows ioctl calls from the same process that opened the KFD file descriptor. Since these ioctls are expected to be called from a KFD criu plugin which has elevated ptrace attached privileges and CAP_CHECKPOINT_RESTORE capabilities attached with the file descriptors so modify KFD to allow such calls. (API redesigned by David Yat Sin) Suggested-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: David Yat Sin Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 81 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index af96af174dc4..49429a6c42fc 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -468,6 +468,82 @@ struct kfd_ioctl_smi_events_args { __u32 anon_fd; /* from KFD */ }; +/************************************************************************************************** + * CRIU IOCTLs (Checkpoint Restore In Userspace) + * + * When checkpointing a process, the userspace application will perform: + * 1. PROCESS_INFO op to determine current process information. This pauses execution and evicts + * all the queues. + * 2. CHECKPOINT op to checkpoint process contents (BOs, queues, events, svm-ranges) + * 3. UNPAUSE op to un-evict all the queues + * + * When restoring a process, the CRIU userspace application will perform: + * + * 1. RESTORE op to restore process contents + * 2. RESUME op to start the process + * + * Note: Queues are forced into an evicted state after a successful PROCESS_INFO. User + * application needs to perform an UNPAUSE operation after calling PROCESS_INFO. + */ + +enum kfd_criu_op { + KFD_CRIU_OP_PROCESS_INFO, + KFD_CRIU_OP_CHECKPOINT, + KFD_CRIU_OP_UNPAUSE, + KFD_CRIU_OP_RESTORE, + KFD_CRIU_OP_RESUME, +}; + +/** + * kfd_ioctl_criu_args - Arguments perform CRIU operation + * @devices: [in/out] User pointer to memory location for devices information. + * This is an array of type kfd_criu_device_bucket. + * @bos: [in/out] User pointer to memory location for BOs information + * This is an array of type kfd_criu_bo_bucket. + * @priv_data: [in/out] User pointer to memory location for private data + * @priv_data_size: [in/out] Size of priv_data in bytes + * @num_devices: [in/out] Number of GPUs used by process. Size of @devices array. + * @num_bos [in/out] Number of BOs used by process. Size of @bos array. + * @num_objects: [in/out] Number of objects used by process. Objects are opaque to + * user application. + * @pid: [in/out] PID of the process being checkpointed + * @op [in] Type of operation (kfd_criu_op) + * + * Return: 0 on success, -errno on failure + */ +struct kfd_ioctl_criu_args { + __u64 devices; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 bos; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 priv_data; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 priv_data_size; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_devices; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_bos; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_objects; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 pid; /* Used during ops: PROCESS_INFO, RESUME */ + __u32 op; +}; + +struct kfd_criu_device_bucket { + __u32 user_gpu_id; + __u32 actual_gpu_id; + __u32 drm_fd; + __u32 pad; +}; + +struct kfd_criu_bo_bucket { + __u64 addr; + __u64 size; + __u64 offset; + __u64 restored_offset; /* During restore, updated offset for BO */ + __u32 gpu_id; /* This is the user_gpu_id */ + __u32 alloc_flags; + __u32 dmabuf_fd; + __u32 pad; +}; + +/* CRIU IOCTLs - END */ +/**************************************************************************************************/ + /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { @@ -742,7 +818,10 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_SET_XNACK_MODE \ AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) +#define AMDKFD_IOC_CRIU_OP \ + AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x22 +#define AMDKFD_COMMAND_END 0x23 #endif -- cgit v1.2.3 From 692996f2bef7aa1737e07554255ba0d9a73fb750 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 18 Jan 2022 01:47:58 -0500 Subject: drm/amdkfd: Bump up KFD API version for CRIU - Change KFD minor version to 7 for CRIU Proposed userspace changes: https://github.com/RadeonOpenCompute/criu Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 49429a6c42fc..e6a56c146920 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -32,9 +32,10 @@ * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API + * - 1.7 - Checkpoint Restore (CRIU) API */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 6 +#define KFD_IOCTL_MINOR_VERSION 7 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ -- cgit v1.2.3 From 7c76ecd9c99b6e9a771d813ab1aa7fa428b3ade1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 8 Feb 2022 16:14:32 +0200 Subject: xfrm: enforce validity of offload input flags struct xfrm_user_offload has flags variable that received user input, but kernel didn't check if valid bits were provided. It caused a situation where not sanitized input was forwarded directly to the drivers. For example, XFRM_OFFLOAD_IPV6 define that was exposed, was used by strongswan, but not implemented in the kernel at all. As a solution, check and sanitize input flags to forward XFRM_OFFLOAD_INBOUND to the drivers. Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Signed-off-by: Leon Romanovsky Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 4e29d7851890..65e13a099b1a 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -511,6 +511,12 @@ struct xfrm_user_offload { int ifindex; __u8 flags; }; +/* This flag was exposed without any kernel code that supporting it. + * Unfortunately, strongswan has the code that uses sets this flag, + * which makes impossible to reuse this bit. + * + * So leave it here to make sure that it won't be reused by mistake. + */ #define XFRM_OFFLOAD_IPV6 1 #define XFRM_OFFLOAD_INBOUND 2 -- cgit v1.2.3 From 63ed1aab3d40aa61aaa66819bdce9377ac7f40fa Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Wed, 9 Feb 2022 12:05:57 +0800 Subject: mctp: Add SIOCMCTP{ALLOC,DROP}TAG ioctls for tag control This change adds a couple of new ioctls for mctp sockets: SIOCMCTPALLOCTAG and SIOCMCTPDROPTAG. These ioctls provide facilities for explicit allocation / release of tags, overriding the automatic allocate-on-send/release-on-reply and timeout behaviours. This allows userspace more control over messages that may not fit a simple request/response model. In order to indicate a pre-allocated tag to the sendmsg() syscall, we introduce a new flag to the struct sockaddr_mctp.smctp_tag value: MCTP_TAG_PREALLOC. Additional changes from Jeremy Kerr . Contains a fix that was: Reported-by: kernel test robot Signed-off-by: Matt Johnston Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/uapi/linux/mctp.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 07b0318716fc..154ab56651f1 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -44,7 +44,25 @@ struct sockaddr_mctp_ext { #define MCTP_TAG_MASK 0x07 #define MCTP_TAG_OWNER 0x08 +#define MCTP_TAG_PREALLOC 0x10 #define MCTP_OPT_ADDR_EXT 1 +#define SIOCMCTPALLOCTAG (SIOCPROTOPRIVATE + 0) +#define SIOCMCTPDROPTAG (SIOCPROTOPRIVATE + 1) + +struct mctp_ioc_tag_ctl { + mctp_eid_t peer_addr; + + /* For SIOCMCTPALLOCTAG: must be passed as zero, kernel will + * populate with the allocated tag value. Returned tag value will + * always have TO and PREALLOC set. + * + * For SIOCMCTPDROPTAG: userspace provides tag value to drop, from + * a prior SIOCMCTPALLOCTAG call (and so must have TO and PREALLOC set). + */ + __u8 tag; + __u16 flags; +}; + #endif /* __UAPI_MCTP_H */ -- cgit v1.2.3 From 9a69e2b385f443f244a7e8b8bcafe5ccfb0866b4 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 9 Feb 2022 19:43:32 +0100 Subject: bpf: Make remote_port field in struct bpf_sk_lookup 16-bit wide remote_port is another case of a BPF context field documented as a 32-bit value in network byte order for which the BPF context access converter generates a load of a zero-padded 16-bit integer in network byte order. First such case was dst_port in bpf_sock which got addressed in commit 4421a582718a ("bpf: Make dst_port field in struct bpf_sock 16-bit wide"). Loading 4-bytes from the remote_port offset and converting the value with bpf_ntohl() leads to surprising results, as the expected value is shifted by 16 bits. Reduce the confusion by splitting the field in two - a 16-bit field holding a big-endian integer, and a 16-bit zero-padding anonymous field that follows it. Suggested-by: Alexei Starovoitov Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220209184333.654927-2-jakub@cloudflare.com --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a7f0ddedac1f..afe3d0d7f5f2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6453,7 +6453,8 @@ struct bpf_sk_lookup { __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ __u32 remote_ip4; /* Network byte order */ __u32 remote_ip6[4]; /* Network byte order */ - __u32 remote_port; /* Network byte order */ + __be16 remote_port; /* Network byte order */ + __u16 :16; /* Zero padding */ __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ -- cgit v1.2.3 From 5bdd3eb253544b1e80f904e1205699d0a126d2d6 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 4 Feb 2022 11:58:32 -0500 Subject: drm/amdkfd: Remove unused old debugger implementation Cleanup the kfd code by removing the unused old debugger implementation. The address watch was only ever implemented in the upstream driver for GFXv7 (Kaveri). The user mode tools runtime using this API was never open-sourced. Work on the old debugger prototype that used this API has been discontinued years ago. Only a small piece of resetting wavefronts is kept and is moved to kfd_device_queue_manager.c. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index e6a56c146920..6e4268f5e482 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -756,16 +756,16 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_WAIT_EVENTS \ AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) -#define AMDKFD_IOC_DBG_REGISTER \ +#define AMDKFD_IOC_DBG_REGISTER_DEPRECATED \ AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) -#define AMDKFD_IOC_DBG_UNREGISTER \ +#define AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED \ AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) -#define AMDKFD_IOC_DBG_ADDRESS_WATCH \ +#define AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED \ AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) -#define AMDKFD_IOC_DBG_WAVE_CONTROL \ +#define AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED \ AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) #define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ -- cgit v1.2.3 From 5cad527d5ffa9a1c4731bb9c97d2ee93f8960d50 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Feb 2022 14:08:38 +0800 Subject: net: drop_monitor: support drop reason In the commit c504e5c2f964 ("net: skb: introduce kfree_skb_reason()") drop reason is introduced to the tracepoint of kfree_skb. Therefore, drop_monitor is able to report the drop reason to users by netlink. The drop reasons are reported as string to users, which is exactly the same as what we do when reporting it to ftrace. Signed-off-by: Menglong Dong Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220209060838.55513-1-imagedong@tencent.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/net_dropmon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h index 66048cc5d7b3..1bbea8f0681e 100644 --- a/include/uapi/linux/net_dropmon.h +++ b/include/uapi/linux/net_dropmon.h @@ -93,6 +93,7 @@ enum net_dm_attr { NET_DM_ATTR_SW_DROPS, /* flag */ NET_DM_ATTR_HW_DROPS, /* flag */ NET_DM_ATTR_FLOW_ACTION_COOKIE, /* binary */ + NET_DM_ATTR_REASON, /* string */ __NET_DM_ATTR_MAX, NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1 -- cgit v1.2.3 From 7f5a08c79df35e68f1a43033450c5050f12bc155 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Tue, 18 Jan 2022 12:43:15 -0800 Subject: user_events: Add minimal support for trace_event into ftrace Minimal support for interacting with dynamic events, trace_event and ftrace. Core outline of flow between user process, ioctl and trace_event APIs. User mode processes that wish to use trace events to get data into ftrace, perf, eBPF, etc are limited to uprobes today. The user events features enables an ABI for user mode processes to create and write to trace events that are isolated from kernel level trace events. This enables a faster path for tracing from user mode data as well as opens managed code to participate in trace events, where stub locations are dynamic. User processes often want to trace only when it's useful. To enable this a set of pages are mapped into the user process space that indicate the current state of the user events that have been registered. User processes can check if their event is hooked to a trace/probe, and if it is, emit the event data out via the write() syscall. Two new files are introduced into tracefs to accomplish this: user_events_status - This file is mmap'd into participating user mode processes to indicate event status. user_events_data - This file is opened and register/delete ioctl's are issued to create/open/delete trace events that can be used for tracing. The typical scenario is on process start to mmap user_events_status. Processes then register the events they plan to use via the REG ioctl. The ioctl reads and updates the passed in user_reg struct. The status_index of the struct is used to know the byte in the status page to check for that event. The write_index of the struct is used to describe that event when writing out to the fd that was used for the ioctl call. The data must always include this index first when writing out data for an event. Data can be written either by write() or by writev(). For example, in memory: int index; char data[]; Psuedo code example of typical usage: struct user_reg reg; int page_fd = open("user_events_status", O_RDWR); char *page_data = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, page_fd, 0); close(page_fd); int data_fd = open("user_events_data", O_RDWR); reg.size = sizeof(reg); reg.name_args = (__u64)"test"; ioctl(data_fd, DIAG_IOCSREG, ®); int status_id = reg.status_index; int write_id = reg.write_index; struct iovec io[2]; io[0].iov_base = &write_id; io[0].iov_len = sizeof(write_id); io[1].iov_base = payload; io[1].iov_len = sizeof(payload); if (page_data[status_id]) writev(data_fd, io, 2); User events are also exposed via the dynamic_events tracefs file for both create and delete. Current status is exposed via the user_events_status tracefs file. Simple example to register a user event via dynamic_events: echo u:test >> dynamic_events cat dynamic_events u:test If an event is hooked to a probe, the probe hooked shows up: echo 1 > events/user_events/test/enable cat user_events_status 1:test # Used by ftrace Active: 1 Busy: 1 Max: 4096 If an event is not hooked to a probe, no probe status shows up: echo 0 > events/user_events/test/enable cat user_events_status 1:test Active: 1 Busy: 0 Max: 4096 Users can describe the trace event format via the following format: name[:FLAG1[,FLAG2...] [field1[;field2...]] Each field has the following format: type name Example for char array with a size of 20 named msg: echo 'u:detailed char[20] msg' >> dynamic_events cat dynamic_events u:detailed char[20] msg Data offsets are based on the data written out via write() and will be updated to reflect the correct offset in the trace_event fields. For dynamic data it is recommended to use the new __rel_loc data type. This type will be the same as __data_loc, but the offset is relative to this entry. This allows user_events to not worry about what common fields are being inserted before the data. The above format is valid for both the ioctl and the dynamic_events file. Link: https://lkml.kernel.org/r/20220118204326.2169-2-beaub@linux.microsoft.com Acked-by: Masami Hiramatsu Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- include/uapi/linux/user_events.h | 116 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 include/uapi/linux/user_events.h (limited to 'include/uapi') diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h new file mode 100644 index 000000000000..e570840571e1 --- /dev/null +++ b/include/uapi/linux/user_events.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2021, Microsoft Corporation. + * + * Authors: + * Beau Belgrave + */ +#ifndef _UAPI_LINUX_USER_EVENTS_H +#define _UAPI_LINUX_USER_EVENTS_H + +#include +#include + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#define USER_EVENTS_SYSTEM "user_events" +#define USER_EVENTS_PREFIX "u:" + +/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */ +#define EVENT_BIT_FTRACE 0 +#define EVENT_BIT_PERF 1 +#define EVENT_BIT_OTHER 7 + +#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE) +#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF) +#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER) + +/* Create dynamic location entry within a 32-bit value */ +#define DYN_LOC(offset, size) ((size) << 16 | (offset)) + +/* Use raw iterator for attached BPF program(s), no affect on ftrace/perf */ +#define FLAG_BPF_ITER (1 << 0) + +/* + * Describes an event registration and stores the results of the registration. + * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum + * must set the size and name_args before invocation. + */ +struct user_reg { + + /* Input: Size of the user_reg structure being used */ + __u32 size; + + /* Input: Pointer to string with event name, description and flags */ + __u64 name_args; + + /* Output: Byte index of the event within the status page */ + __u32 status_index; + + /* Output: Index of the event to use when writing data */ + __u32 write_index; +}; + +#define DIAG_IOC_MAGIC '*' + +/* Requests to register a user_event */ +#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg*) + +/* Requests to delete a user_event */ +#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*) + +/* Data type that was passed to the BPF program */ +enum { + /* Data resides in kernel space */ + USER_BPF_DATA_KERNEL, + + /* Data resides in user space */ + USER_BPF_DATA_USER, + + /* Data is a pointer to a user_bpf_iter structure */ + USER_BPF_DATA_ITER, +}; + +/* + * Describes an iovec iterator that BPF programs can use to access data for + * a given user_event write() / writev() call. + */ +struct user_bpf_iter { + + /* Offset of the data within the first iovec */ + __u32 iov_offset; + + /* Number of iovec structures */ + __u32 nr_segs; + + /* Pointer to iovec structures */ + const struct iovec *iov; +}; + +/* Context that BPF programs receive when attached to a user_event */ +struct user_bpf_context { + + /* Data type being passed (see union below) */ + __u32 data_type; + + /* Length of the data */ + __u32 data_len; + + /* Pointer to data, varies by data type */ + union { + /* Kernel data (data_type == USER_BPF_DATA_KERNEL) */ + void *kdata; + + /* User data (data_type == USER_BPF_DATA_USER) */ + void *udata; + + /* Direct iovec (data_type == USER_BPF_DATA_ITER) */ + struct user_bpf_iter *iter; + }; +}; + +#endif /* _UAPI_LINUX_USER_EVENTS_H */ -- cgit v1.2.3 From a6a6fe27bab48f0d09a64b051e7bde432fcae081 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Thu, 10 Feb 2022 17:11:37 +0800 Subject: net/smc: Dynamic control handshake limitation by socket options This patch aims to add dynamic control for SMC handshake limitation for every smc sockets, in production environment, it is possible for the same applications to handle different service types, and may have different opinion on SMC handshake limitation. This patch try socket options to complete it, since we don't have socket option level for SMC yet, which requires us to implement it at the same time. This patch does the following: - add new socket option level: SOL_SMC. - add new SMC socket option: SMC_LIMIT_HS. - provide getter/setter for SMC socket options. Link: https://lore.kernel.org/all/20f504f961e1a803f85d64229ad84260434203bd.1644323503.git.alibuda@linux.alibaba.com/ Signed-off-by: D. Wythe Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 6c2874fd2c00..343e7450c3a3 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -284,4 +284,8 @@ enum { __SMC_NLA_SEID_TABLE_MAX, SMC_NLA_SEID_TABLE_MAX = __SMC_NLA_SEID_TABLE_MAX - 1 }; + +/* SMC socket options */ +#define SMC_LIMIT_HS 1 /* constraint on smc handshake */ + #endif /* _UAPI_LINUX_SMC_H */ -- cgit v1.2.3 From f9496b7c1b48ce02cd17a3ee88b1e049c689a222 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Thu, 10 Feb 2022 17:11:38 +0800 Subject: net/smc: Add global configure for handshake limitation by netlink Although we can control SMC handshake limitation through socket options, which means that applications who need it must modify their code. It's quite troublesome for many existing applications. This patch modifies the global default value of SMC handshake limitation through netlink, providing a way to put constraint on handshake without modifies any code for applications. Suggested-by: Tony Lu Signed-off-by: D. Wythe Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 343e7450c3a3..693f549f6966 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -59,6 +59,9 @@ enum { SMC_NETLINK_DUMP_SEID, SMC_NETLINK_ENABLE_SEID, SMC_NETLINK_DISABLE_SEID, + SMC_NETLINK_DUMP_HS_LIMITATION, + SMC_NETLINK_ENABLE_HS_LIMITATION, + SMC_NETLINK_DISABLE_HS_LIMITATION, }; /* SMC_GENL_FAMILY top level attributes */ @@ -285,6 +288,14 @@ enum { SMC_NLA_SEID_TABLE_MAX = __SMC_NLA_SEID_TABLE_MAX - 1 }; +/* SMC_NETLINK_HS_LIMITATION attributes */ +enum { + SMC_NLA_HS_LIMITATION_UNSPEC, + SMC_NLA_HS_LIMITATION_ENABLED, /* u8 */ + __SMC_NLA_HS_LIMITATION_MAX, + SMC_NLA_HS_LIMITATION_MAX = __SMC_NLA_HS_LIMITATION_MAX - 1 +}; + /* SMC socket options */ #define SMC_LIMIT_HS 1 /* constraint on smc handshake */ -- cgit v1.2.3 From e9e9feebcbc14b174fef862842f8cc9a388e1db3 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:10 +0100 Subject: KVM: s390: Add optional storage key checking to MEMOP IOCTL User space needs a mechanism to perform key checked accesses when emulating instructions. The key can be passed as an additional argument. Having an additional argument is flexible, as user space can pass the guest PSW's key, in order to make an access the same way the CPU would, or pass another key if necessary. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20220211182215.2730017-6-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5191b57e1562..4566f429db2c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -562,7 +562,10 @@ struct kvm_s390_mem_op { __u32 op; /* type of operation */ __u64 buf; /* buffer in userspace */ union { - __u8 ar; /* the access register number */ + struct { + __u8 ar; /* the access register number */ + __u8 key; /* access key, ignored if flag unset */ + }; __u32 sida_offset; /* offset into the sida */ __u8 reserved[32]; /* should be set to 0 */ }; @@ -575,6 +578,7 @@ struct kvm_s390_mem_op { /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) /* for KVM_INTERRUPT */ struct kvm_interrupt { -- cgit v1.2.3 From ef11c9463ae006302ce170a401854a48ea0532ca Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:11 +0100 Subject: KVM: s390: Add vm IOCTL for key checked guest absolute memory access Channel I/O honors storage keys and is performed on absolute memory. For I/O emulation user space therefore needs to be able to do key checked accesses. The vm IOCTL supports read/write accesses, as well as checking if an access would succeed. Unlike relying on KVM_S390_GET_SKEYS for key checking would, the vm IOCTL performs the check in lockstep with the read or write, by, ultimately, mapping the access to move instructions that support key protection checking with a supplied key. Fetch and storage protection override are not applicable to absolute accesses and so are not applied as they are when using the vcpu memop. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20220211182215.2730017-7-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4566f429db2c..4bc7623def87 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -575,6 +575,8 @@ struct kvm_s390_mem_op { #define KVM_S390_MEMOP_LOGICAL_WRITE 1 #define KVM_S390_MEMOP_SIDA_READ 2 #define KVM_S390_MEMOP_SIDA_WRITE 3 +#define KVM_S390_MEMOP_ABSOLUTE_READ 4 +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) -- cgit v1.2.3 From d004079edc166ff19605475211305923c708b4d5 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:13 +0100 Subject: KVM: s390: Add capability for storage key extension of MEM_OP IOCTL Availability of the KVM_CAP_S390_MEM_OP_EXTENSION capability signals that: * The vcpu MEM_OP IOCTL supports storage key checking. * The vm MEM_OP IOCTL exists. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20220211182215.2730017-9-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4bc7623def87..08756eeea065 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1140,6 +1140,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 #define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_S390_MEM_OP_EXTENSION 210 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 5e35d0eb472b48ac9c8ef7017753b8a1f765aa01 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:14 +0100 Subject: KVM: s390: Update api documentation for memop ioctl Document all currently existing operations, flags and explain under which circumstances they are available. Document the recently introduced absolute operations and the storage key protection flag, as well as the existing SIDA operations. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20220211182215.2730017-10-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 08756eeea065..dbc550bbd9fa 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -567,7 +567,7 @@ struct kvm_s390_mem_op { __u8 key; /* access key, ignored if flag unset */ }; __u32 sida_offset; /* offset into the sida */ - __u8 reserved[32]; /* should be set to 0 */ + __u8 reserved[32]; /* ignored */ }; }; /* types for kvm_s390_mem_op->op */ -- cgit v1.2.3 From 761b9b366cec0c81a1cd80930f00611d86521d1b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:53 +0000 Subject: elf: Introduce the ARM MTE ELF segment type Memory tags will be dumped in the core file as segments with their own type. Discussions with the binutils and the generic ABI community settled on using new definitions in the PT_*PROC space (and to be documented in the processor-specific ABIs). Introduce PT_ARM_MEMTAG_MTE as (PT_LOPROC + 0x1). Not included in this patch since there is no upstream support but the CHERI/BSD community will also reserve: #define PT_ARM_MEMTAG_CHERI (PT_LOPROC + 0x2) #define PT_RISCV_MEMTAG_CHERI (PT_LOPROC + 0x3) Signed-off-by: Catalin Marinas Acked-by: Luis Machado Link: https://lore.kernel.org/r/20220131165456.2160675-3-catalin.marinas@arm.com Signed-off-by: Will Deacon --- include/uapi/linux/elf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 61bf4774b8f2..fe8e5b74cb39 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -40,6 +40,9 @@ typedef __s64 Elf64_Sxword; #define PT_GNU_STACK (PT_LOOS + 0x474e551) +/* ARM MTE memory tag segment type */ +#define PT_ARM_MEMTAG_MTE (PT_LOPROC + 0x1) + /* * Extended Numbering * -- cgit v1.2.3 From 5cd5a8a3e2fb11b1c8a09f062c44c1e228ef987a Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 14 Feb 2022 17:29:53 +0100 Subject: cfg80211: Add data structures to capture EHT capabilities And advertise EHT capabilities to user space when supported. Signed-off-by: Ilan Peer Link: https://lore.kernel.org/r/20220214173004.6fb70658529f.I2413a37c8f7d2d6d638038a3d95360a3fce0114d@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 195a238a322e..d305a8b8c536 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3766,6 +3766,14 @@ enum nl80211_mpath_info { * given for all 6 GHz band channels * @NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS: vendor element capabilities that are * advertised on this band/for this iftype (binary) + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MAC: EHT MAC capabilities as in EHT + * capabilities element + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PHY: EHT PHY capabilities as in EHT + * capabilities element + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MCS_SET: EHT supported NSS/MCS as in EHT + * capabilities element + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE: EHT PPE thresholds information as + * defined in EHT capabilities element * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band attribute currently defined */ @@ -3779,6 +3787,10 @@ enum nl80211_band_iftype_attr { NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE, NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA, NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MAC, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PHY, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MCS_SET, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE, /* keep last */ __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST, -- cgit v1.2.3 From 3743bec6120ae0748cb3fda6ff80a690117ef1f3 Mon Sep 17 00:00:00 2001 From: Jia Ding Date: Mon, 14 Feb 2022 17:29:54 +0100 Subject: cfg80211: Add support for EHT 320 MHz channel width Add 320 MHz support in the channel def and center frequency validation with compatible check. Signed-off-by: Jia Ding Co-authored-by: Karthikeyan Periyasamy Signed-off-by: Karthikeyan Periyasamy Co-authored-by: Muna Sinada Signed-off-by: Muna Sinada Co-authored-by: Veerendranath Jakkam Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1640163883-12696-5-git-send-email-quic_vjakkam@quicinc.com Link: https://lore.kernel.org/r/20220214163009.175289-1-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d305a8b8c536..9e05973f3f56 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4684,6 +4684,8 @@ enum nl80211_key_mode { * @NL80211_CHAN_WIDTH_4: 4 MHz OFDM channel * @NL80211_CHAN_WIDTH_8: 8 MHz OFDM channel * @NL80211_CHAN_WIDTH_16: 16 MHz OFDM channel + * @NL80211_CHAN_WIDTH_320: 320 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well */ enum nl80211_chan_width { NL80211_CHAN_WIDTH_20_NOHT, @@ -4699,6 +4701,7 @@ enum nl80211_chan_width { NL80211_CHAN_WIDTH_4, NL80211_CHAN_WIDTH_8, NL80211_CHAN_WIDTH_16, + NL80211_CHAN_WIDTH_320, }; /** -- cgit v1.2.3 From cfb14110acf87b4db62e07ba08a80429f1749f40 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Mon, 14 Feb 2022 17:29:55 +0100 Subject: nl80211: add EHT MCS support Add support for reporting and calculating EHT bitrates. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1640163883-12696-7-git-send-email-quic_vjakkam@quicinc.com Link: https://lore.kernel.org/r/20220214163009.175289-2-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 62 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9e05973f3f56..d0ba70ea5d04 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3392,6 +3392,56 @@ enum nl80211_he_ru_alloc { NL80211_RATE_INFO_HE_RU_ALLOC_2x996, }; +/** + * enum nl80211_eht_gi - EHT guard interval + * @NL80211_RATE_INFO_EHT_GI_0_8: 0.8 usec + * @NL80211_RATE_INFO_EHT_GI_1_6: 1.6 usec + * @NL80211_RATE_INFO_EHT_GI_3_2: 3.2 usec + */ +enum nl80211_eht_gi { + NL80211_RATE_INFO_EHT_GI_0_8, + NL80211_RATE_INFO_EHT_GI_1_6, + NL80211_RATE_INFO_EHT_GI_3_2, +}; + +/** + * enum nl80211_eht_ru_alloc - EHT RU allocation values + * @NL80211_RATE_INFO_EHT_RU_ALLOC_26: 26-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_52: 52-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_52P26: 52+26-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_106: 106-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_106P26: 106+26 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_242: 242-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_484: 484-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_484P242: 484+242 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_996: 996-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_996P484: 996+484 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242: 996+484+242 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_2x996: 2x996-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484: 2x996+484 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_3x996: 3x996-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484: 3x996+484 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_4x996: 4x996-tone RU allocation + */ +enum nl80211_eht_ru_alloc { + NL80211_RATE_INFO_EHT_RU_ALLOC_26, + NL80211_RATE_INFO_EHT_RU_ALLOC_52, + NL80211_RATE_INFO_EHT_RU_ALLOC_52P26, + NL80211_RATE_INFO_EHT_RU_ALLOC_106, + NL80211_RATE_INFO_EHT_RU_ALLOC_106P26, + NL80211_RATE_INFO_EHT_RU_ALLOC_242, + NL80211_RATE_INFO_EHT_RU_ALLOC_484, + NL80211_RATE_INFO_EHT_RU_ALLOC_484P242, + NL80211_RATE_INFO_EHT_RU_ALLOC_996, + NL80211_RATE_INFO_EHT_RU_ALLOC_996P484, + NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242, + NL80211_RATE_INFO_EHT_RU_ALLOC_2x996, + NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484, + NL80211_RATE_INFO_EHT_RU_ALLOC_3x996, + NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484, + NL80211_RATE_INFO_EHT_RU_ALLOC_4x996, +}; + /** * enum nl80211_rate_info - bitrate information * @@ -3431,6 +3481,13 @@ enum nl80211_he_ru_alloc { * @NL80211_RATE_INFO_HE_DCM: HE DCM value (u8, 0/1) * @NL80211_RATE_INFO_RU_ALLOC: HE RU allocation, if not present then * non-OFDMA was used (u8, see &enum nl80211_he_ru_alloc) + * @NL80211_RATE_INFO_320_MHZ_WIDTH: 320 MHz bitrate + * @NL80211_RATE_INFO_EHT_MCS: EHT MCS index (u8, 0-15) + * @NL80211_RATE_INFO_EHT_NSS: EHT NSS value (u8, 1-8) + * @NL80211_RATE_INFO_EHT_GI: EHT guard interval identifier + * (u8, see &enum nl80211_eht_gi) + * @NL80211_RATE_INFO_EHT_RU_ALLOC: EHT RU allocation, if not present then + * non-OFDMA was used (u8, see &enum nl80211_eht_ru_alloc) * @__NL80211_RATE_INFO_AFTER_LAST: internal use */ enum nl80211_rate_info { @@ -3452,6 +3509,11 @@ enum nl80211_rate_info { NL80211_RATE_INFO_HE_GI, NL80211_RATE_INFO_HE_DCM, NL80211_RATE_INFO_HE_RU_ALLOC, + NL80211_RATE_INFO_320_MHZ_WIDTH, + NL80211_RATE_INFO_EHT_MCS, + NL80211_RATE_INFO_EHT_NSS, + NL80211_RATE_INFO_EHT_GI, + NL80211_RATE_INFO_EHT_RU_ALLOC, /* keep last */ __NL80211_RATE_INFO_AFTER_LAST, -- cgit v1.2.3 From c2b3d7699fb0ce66538b829af43970acc2f89060 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Mon, 14 Feb 2022 17:29:56 +0100 Subject: nl80211: add support for 320MHz channel limitation Add support to advertise drivers or regulatory limitations on 320 MHz channels to userspace. Signed-off-by: Sriram R Co-authored-by: Karthikeyan Periyasamy Signed-off-by: Karthikeyan Periyasamy Co-authored-by: Veerendranath Jakkam Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1640163883-12696-6-git-send-email-quic_vjakkam@quicinc.com Link: https://lore.kernel.org/r/20220214163009.175289-3-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d0ba70ea5d04..6a338dafcd07 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3997,6 +3997,8 @@ enum nl80211_wmm_rule { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_16MHZ: 16 MHz operation is allowed * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_NO_320MHZ: any 320 MHz channel using this channel + * as the primary or any of the secondary channels isn't possible * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4033,6 +4035,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_4MHZ, NL80211_FREQUENCY_ATTR_8MHZ, NL80211_FREQUENCY_ATTR_16MHZ, + NL80211_FREQUENCY_ATTR_NO_320MHZ, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4231,6 +4234,7 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed * @NL80211_RRF_NO_160MHZ: 160MHz operation not allowed * @NL80211_RRF_NO_HE: HE operation not allowed + * @NL80211_RRF_NO_320MHZ: 320MHz operation not allowed */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -4249,6 +4253,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_80MHZ = 1<<15, NL80211_RRF_NO_160MHZ = 1<<16, NL80211_RRF_NO_HE = 1<<17, + NL80211_RRF_NO_320MHZ = 1<<18, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR -- cgit v1.2.3 From 31846b657857e6a73d982604f36a34710d98902c Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 14 Feb 2022 17:29:57 +0100 Subject: cfg80211: add NO-EHT flag to regulatory This may be necessary in some cases, add a flag and propagate it, just like the NO-HE that already exists. Signed-off-by: Ilan Peer [split off from a combined 320/no-EHT patch] Link: https://lore.kernel.org/r/20220214173004.dbb85a7b86bb.Ifc1e2daac51c1cc5f895ccfb79faf5eaec3950ec@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6a338dafcd07..baf6433c0119 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3999,6 +3999,8 @@ enum nl80211_wmm_rule { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_NO_320MHZ: any 320 MHz channel using this channel * as the primary or any of the secondary channels isn't possible + * @NL80211_FREQUENCY_ATTR_NO_EHT: EHT operation is not allowed on this channel + * in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4036,6 +4038,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_8MHZ, NL80211_FREQUENCY_ATTR_16MHZ, NL80211_FREQUENCY_ATTR_NO_320MHZ, + NL80211_FREQUENCY_ATTR_NO_EHT, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, -- cgit v1.2.3 From ea05fd3581d32a0f1098657005c7a9b763798fe8 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 14 Feb 2022 17:29:58 +0100 Subject: cfg80211: Support configuration of station EHT capabilities Add attributes and some code bits to support userspace passing in EHT capabilities of stations. Signed-off-by: Ilan Peer Link: https://lore.kernel.org/r/20220214173004.ecf0b3ff9627.Icb4a5f2ec7b41d9008ac4cfc16c59baeb84793d3@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index baf6433c0119..98ed52663d6b 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -11,7 +11,7 @@ * Copyright 2008 Jouni Malinen * Copyright 2008 Colin McCabe * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -2659,6 +2659,10 @@ enum nl80211_commands { * enumerated in &enum nl80211_ap_settings_flags. This attribute shall be * used with %NL80211_CMD_START_AP request. * + * @NL80211_ATTR_EHT_CAPABILITY: EHT Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION). Can be set + * only if %NL80211_STA_FLAG_WME is set. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3169,6 +3173,8 @@ enum nl80211_attrs { NL80211_ATTR_AP_SETTINGS_FLAGS, + NL80211_ATTR_EHT_CAPABILITY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3224,6 +3230,8 @@ enum nl80211_attrs { #define NL80211_HE_MAX_CAPABILITY_LEN 54 #define NL80211_MAX_NR_CIPHER_SUITES 5 #define NL80211_MAX_NR_AKM_SUITES 2 +#define NL80211_EHT_MIN_CAPABILITY_LEN 13 +#define NL80211_EHT_MAX_CAPABILITY_LEN 51 #define NL80211_MIN_REMAIN_ON_CHANNEL_TIME 10 -- cgit v1.2.3 From 79aa0367385ceaf5351ea77ea1fb66136739ea9d Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 15 Feb 2022 18:54:38 -0500 Subject: drm/amdkfd: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reference: https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays CC: Changcheng Deng Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 6e4268f5e482..baec5a41de3e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -673,7 +673,7 @@ struct kfd_ioctl_svm_args { __u32 op; __u32 nattr; /* Variable length array of attributes */ - struct kfd_ioctl_svm_attribute attrs[0]; + struct kfd_ioctl_svm_attribute attrs[]; }; /** -- cgit v1.2.3 From a65dbf7cded724a5ed4a5e1a718616b048ca0c34 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 23 Dec 2021 09:35:37 +0800 Subject: drm/amdgpu/gfx10: Add GC 10.3.7 Support Needed to properly initialize GC 10.3.7. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 76b580d10a52..55fa660e35f4 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1151,6 +1151,7 @@ struct drm_amdgpu_info_video_caps { #define AMDGPU_FAMILY_NV 143 /* Navi10 */ #define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ #define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ +#define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #if defined(__cplusplus) } -- cgit v1.2.3 From 72113d0a7d90d950c7c9a87ab905bffb6bc5752d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:24 +0900 Subject: signal.h: add linux/signal.h and asm/signal.h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/signal.h and asm/signal.h are currently excluded from the UAPI compile-test because of the errors like follows: HDRTEST usr/include/asm/signal.h In file included from : ./usr/include/asm/signal.h:103:9: error: unknown type name ‘size_t’ 103 | size_t ss_size; | ^~~~~~ The errors can be fixed by replacing size_t with __kernel_size_t. Then, remove the no-header-test entries from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Reviewed-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/signal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/signal.h b/include/uapi/asm-generic/signal.h index f634822906e4..0eb69dc8e572 100644 --- a/include/uapi/asm-generic/signal.h +++ b/include/uapi/asm-generic/signal.h @@ -85,7 +85,7 @@ struct sigaction { typedef struct sigaltstack { void __user *ss_sp; int ss_flags; - size_t ss_size; + __kernel_size_t ss_size; } stack_t; #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 4a3233c1a69885aa7e71c48ff39ae11c212ac90a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:25 +0900 Subject: shmbuf.h: add asm/shmbuf.h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit asm/shmbuf.h is currently excluded from the UAPI compile-test because of the errors like follows: HDRTEST usr/include/asm/shmbuf.h In file included from ./usr/include/asm/shmbuf.h:6, from : ./usr/include/asm-generic/shmbuf.h:26:33: error: field ‘shm_perm’ has incomplete type 26 | struct ipc64_perm shm_perm; /* operation perms */ | ^~~~~~~~ ./usr/include/asm-generic/shmbuf.h:27:9: error: unknown type name ‘size_t’ 27 | size_t shm_segsz; /* size of segment (bytes) */ | ^~~~~~ ./usr/include/asm-generic/shmbuf.h:40:9: error: unknown type name ‘__kernel_pid_t’ 40 | __kernel_pid_t shm_cpid; /* pid of creator */ | ^~~~~~~~~~~~~~ ./usr/include/asm-generic/shmbuf.h:41:9: error: unknown type name ‘__kernel_pid_t’ 41 | __kernel_pid_t shm_lpid; /* pid of last operator */ | ^~~~~~~~~~~~~~ The errors can be fixed by replacing size_t with __kernel_size_t and by including proper headers. Then, remove the no-header-test entry from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/shmbuf.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/shmbuf.h b/include/uapi/asm-generic/shmbuf.h index 2bab955e0fed..2979b6dd2c56 100644 --- a/include/uapi/asm-generic/shmbuf.h +++ b/include/uapi/asm-generic/shmbuf.h @@ -3,6 +3,8 @@ #define __ASM_GENERIC_SHMBUF_H #include +#include +#include /* * The shmid64_ds structure for x86 architecture. @@ -24,7 +26,7 @@ struct shmid64_ds { struct ipc64_perm shm_perm; /* operation perms */ - size_t shm_segsz; /* size of segment (bytes) */ + __kernel_size_t shm_segsz; /* size of segment (bytes) */ #if __BITS_PER_LONG == 64 long shm_atime; /* last attach time */ long shm_dtime; /* last detach time */ -- cgit v1.2.3 From 169adc2b6b3c5e86391921117b4ab3aaeb3c6ee1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:26 +0900 Subject: android/binder.h: add linux/android/binder(fs).h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/android/binder.h and linux/android/binderfs.h are currently excluded from the UAPI compile-test because of the errors like follows: HDRTEST usr/include/linux/android/binder.h In file included from : ./usr/include/linux/android/binder.h:291:9: error: unknown type name ‘pid_t’ 291 | pid_t sender_pid; | ^~~~~ ./usr/include/linux/android/binder.h:292:9: error: unknown type name ‘uid_t’ 292 | uid_t sender_euid; | ^~~~~ The errors can be fixed by replacing {pid,uid}_t with __kernel_{pid,uid}_t. Then, remove the no-header-test entries from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- include/uapi/linux/android/binder.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 3246f2c74696..11157fae8a8e 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -288,8 +288,8 @@ struct binder_transaction_data { /* General information about the transaction. */ __u32 flags; - pid_t sender_pid; - uid_t sender_euid; + __kernel_pid_t sender_pid; + __kernel_uid_t sender_euid; binder_size_t data_size; /* number of bytes of data */ binder_size_t offsets_size; /* number of bytes of offsets */ -- cgit v1.2.3 From cbf2820341297b9aed0f846aba35556e94569210 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:27 +0900 Subject: fsmap.h: add linux/fsmap.h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/fsmap.h is currently excluded from the UAPI compile-test because of the error like follows: HDRTEST usr/include/linux/fsmap.h In file included from : ./usr/include/linux/fsmap.h:72:19: error: unknown type name ‘size_t’ 72 | static __inline__ size_t | ^~~~~~ The error can be fixed by replacing size_t with __kernel_size_t. Then, remove the no-header-test entry from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- include/uapi/linux/fsmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/fsmap.h b/include/uapi/linux/fsmap.h index 91fd519a3f7d..c690d17f1d07 100644 --- a/include/uapi/linux/fsmap.h +++ b/include/uapi/linux/fsmap.h @@ -69,7 +69,7 @@ struct fsmap_head { }; /* Size of an fsmap_head with room for nr records. */ -static inline size_t +static inline __kernel_size_t fsmap_sizeof( unsigned int nr) { -- cgit v1.2.3 From 8b4bca21c2c0cb3b5adb80985830a81d4e5d7081 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:28 +0900 Subject: kexec.h: add linux/kexec.h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/kexec.h is currently excluded from the UAPI compile-test because of the errors like follows: HDRTEST usr/include/linux/kexec.h In file included from : ./usr/include/linux/kexec.h:56:9: error: unknown type name ‘size_t’ 56 | size_t bufsz; | ^~~~~~ ./usr/include/linux/kexec.h:58:9: error: unknown type name ‘size_t’ 58 | size_t memsz; | ^~~~~~ The errors can be fixed by replacing size_t with __kernel_size_t. Then, remove the no-header-test entry from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- include/uapi/linux/kexec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 778dc191c265..fb7e2ef60825 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -54,9 +54,9 @@ */ struct kexec_segment { const void *buf; - size_t bufsz; + __kernel_size_t bufsz; const void *mem; - size_t memsz; + __kernel_size_t memsz; }; #endif /* __KERNEL__ */ -- cgit v1.2.3 From 2a5c0fdc70cd653741e910e92ffeb2fa7376db07 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:29 +0900 Subject: reiserfs_xattr.h: add linux/reiserfs_xattr.h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/reiserfs_xattr.h is currently excluded from the UAPI compile-test because of the error like follows: HDRTEST usr/include/linux/reiserfs_xattr.h In file included from : ./usr/include/linux/reiserfs_xattr.h:22:9: error: unknown type name ‘size_t’ 22 | size_t length; | ^~~~~~ The error can be fixed by replacing size_t with __kernel_size_t. Then, remove the no-header-test entry from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- include/uapi/linux/reiserfs_xattr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/reiserfs_xattr.h b/include/uapi/linux/reiserfs_xattr.h index 28f10842f047..503ad018ce5b 100644 --- a/include/uapi/linux/reiserfs_xattr.h +++ b/include/uapi/linux/reiserfs_xattr.h @@ -19,7 +19,7 @@ struct reiserfs_xattr_header { struct reiserfs_security_handle { const char *name; void *value; - size_t length; + __kernel_size_t length; }; #endif /* _LINUX_REISERFS_XATTR_H */ -- cgit v1.2.3 From d4568fc8525897e683983806f813be1ae9eedaed Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 Jan 2022 18:29:52 +0100 Subject: media: omap3isp: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Wrap the target region in struct_group(). This additionally fixes a theoretical misalignment of the copy (since the size of "buf" changes between 64-bit and 32-bit, but this is likely never built for 64-bit). FWIW, I think this code is totally broken on 64-bit (which appears to not be a "real" build configuration): it would either always fail (with an uninitialized data->buf_size) or would cause corruption in userspace due to the copy_to_user() in the call path against an uninitialized data->buf value: omap3isp_stat_request_statistics_time32(...) struct omap3isp_stat_data data64; ... omap3isp_stat_request_statistics(stat, &data64); int omap3isp_stat_request_statistics(struct ispstat *stat, struct omap3isp_stat_data *data) ... buf = isp_stat_buf_get(stat, data); static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat, struct omap3isp_stat_data *data) ... if (buf->buf_size > data->buf_size) { ... return ERR_PTR(-EINVAL); } ... rval = copy_to_user(data->buf, buf->virt_addr, buf->buf_size); Regardless, additionally initialize data64 to be zero-filled to avoid undefined behavior. Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor Cc: Arnd Bergmann Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data") Cc: stable@vger.kernel.org Reviewed-by: Gustavo A. R. Silva Signed-off-by: Kees Cook Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/omap3isp.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h index 87b55755f4ff..d9db7ad43890 100644 --- a/include/uapi/linux/omap3isp.h +++ b/include/uapi/linux/omap3isp.h @@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { * struct omap3isp_stat_data - Statistic data sent to or received from user * @ts: Timestamp of returned framestats. * @buf: Pointer to pass to user. + * @buf_size: Size of buffer. * @frame_number: Frame number of requested stats. * @cur_frame: Current frame number being processed. * @config_counter: Number of the configuration associated with the data. @@ -176,10 +177,12 @@ struct omap3isp_stat_data { struct timeval ts; #endif void __user *buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #ifdef __KERNEL__ @@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { __s32 tv_usec; } ts; __u32 buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #endif -- cgit v1.2.3 From 874bfdfa4735cbb1b0d6e0c6157c712a312371a1 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Thu, 10 Feb 2022 14:42:09 -0500 Subject: drm/amdgpu: add gc 10.3.6 support this patch adds gc 10.3.6 support. Signed-off-by: Yifan Zhang Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 55fa660e35f4..1d65c1fbc4ec 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1151,6 +1151,7 @@ struct drm_amdgpu_info_video_caps { #define AMDGPU_FAMILY_NV 143 /* Navi10 */ #define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ #define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ +#define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ #define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #if defined(__cplusplus) -- cgit v1.2.3 From 6de74d1069b821e96460d0fc2edfc35785db04fb Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 9 Feb 2022 08:11:10 -0800 Subject: hv_utils: Add comment about max VMbus packet size in VSS driver The VSS driver allocates a VMbus receive buffer significantly larger than sizeof(hv_vss_msg), with no explanation. To help prevent future mistakes, add a #define and comment about why this is done. No functional change. Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1644423070-75125-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- include/uapi/linux/hyperv.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h index daf82a230c0e..aaa502a7bff4 100644 --- a/include/uapi/linux/hyperv.h +++ b/include/uapi/linux/hyperv.h @@ -90,6 +90,17 @@ struct hv_vss_check_dm_info { __u32 flags; } __attribute__((packed)); +/* + * struct hv_vss_msg encodes the fields that the Linux VSS + * driver accesses. However, FREEZE messages from Hyper-V contain + * additional LUN information that Linux doesn't use and are not + * represented in struct hv_vss_msg. A received FREEZE message may + * be as large as 6,260 bytes, so the driver must allocate at least + * that much space, not sizeof(struct hv_vss_msg). Other messages + * such as AUTO_RECOVER may be as large as 12,500 bytes. However, + * because the Linux VSS driver responds that it doesn't support + * auto-recovery, it should not receive such messages. + */ struct hv_vss_msg { union { struct hv_vss_hdr vss_hdr; -- cgit v1.2.3 From 47f0bd5032106469827cf56c8b45bb9101112105 Mon Sep 17 00:00:00 2001 From: Jacques de Laval Date: Thu, 17 Feb 2022 16:02:02 +0100 Subject: net: Add new protocol attribute to IP addresses This patch adds a new protocol attribute to IPv4 and IPv6 addresses. Inspiration was taken from the protocol attribute of routes. User space applications like iproute2 can set/get the protocol with the Netlink API. The attribute is stored as an 8-bit unsigned integer. The protocol attribute is set by kernel for these categories: - IPv4 and IPv6 loopback addresses - IPv6 addresses generated from router announcements - IPv6 link local addresses User space may pass custom protocols, not defined by the kernel. Grouping addresses on their origin is useful in scenarios where you want to distinguish between addresses based on who added them, e.g. kernel vs. user space. Tagging addresses with a string label is an existing feature that could be used as a solution. Unfortunately the max length of a label is 15 characters, and for compatibility reasons the label must be prefixed with the name of the device followed by a colon. Since device names also have a max length of 15 characters, only -1 characters is guaranteed to be available for any origin tag, which is not that much. A reference implementation of user space setting and getting protocols is available for iproute2: https://github.com/westermo/iproute2/commit/9a6ea18bd79f47f293e5edc7780f315ea42ff540 Signed-off-by: Jacques de Laval Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220217150202.80802-1-Jacques.De.Laval@westermo.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_addr.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h index dfcf3ce0097f..1c392dd95a5e 100644 --- a/include/uapi/linux/if_addr.h +++ b/include/uapi/linux/if_addr.h @@ -33,8 +33,9 @@ enum { IFA_CACHEINFO, IFA_MULTICAST, IFA_FLAGS, - IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ + IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ IFA_TARGET_NETNSID, + IFA_PROTO, /* u8, address protocol */ __IFA_MAX, }; @@ -69,4 +70,10 @@ struct ifa_cacheinfo { #define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg)) #endif +/* ifa_proto */ +#define IFAPROT_UNSPEC 0 +#define IFAPROT_KERNEL_LO 1 /* loopback */ +#define IFAPROT_KERNEL_RA 2 /* set by kernel from router announcement */ +#define IFAPROT_KERNEL_LL 3 /* link-local set by kernel */ + #endif -- cgit v1.2.3 From 4b340a5a726dafba15b366c4009aa0a8f77631ac Mon Sep 17 00:00:00 2001 From: Mobashshera Rasool Date: Thu, 17 Feb 2022 07:46:40 +0000 Subject: net: ip6mr: add support for passing full packet on wrong mif This patch adds support for MRT6MSG_WRMIFWHOLE which is used to pass full packet and real vif id when the incoming interface is wrong. While the RP and FHR are setting up state we need to be sending the registers encapsulated with all the data inside otherwise we lose it. The RP then decapsulates it and forwards it to the interested parties. Currently with WRONGMIF we can only be sending empty register packets and will lose that data. This behaviour can be enabled by using MRT_PIM with val == MRT6MSG_WRMIFWHOLE. This doesn't prevent MRT6MSG_WRONGMIF from happening, it happens in addition to it, also it is controlled by the same throttling parameters as WRONGMIF (i.e. 1 packet per 3 seconds currently). Both messages are generated to keep backwards compatibily and avoid breaking someone who was enabling MRT_PIM with val == 4, since any positive val is accepted and treated the same. Signed-off-by: Mobashshera Rasool Signed-off-by: David S. Miller --- include/uapi/linux/mroute6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index a1fd6173e2db..1d90c21a6251 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -134,6 +134,7 @@ struct mrt6msg { #define MRT6MSG_NOCACHE 1 #define MRT6MSG_WRONGMIF 2 #define MRT6MSG_WHOLEPKT 3 /* used for use level encap */ +#define MRT6MSG_WRMIFWHOLE 4 /* For PIM Register and assert processing */ __u8 im6_mbz; /* must be zero */ __u8 im6_msgtype; /* what type of message */ __u16 im6_mif; /* mif rec'd on */ -- cgit v1.2.3 From caa574ffc4aaf4f29b890223878c63e2e7772f62 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Sat, 19 Feb 2022 00:17:49 +0530 Subject: drm/i915/uapi: document behaviour for DG2 64K support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On discrete platforms like DG2, we need to support a minimum page size of 64K when dealing with device local-memory. This is quite tricky for various reasons, so try to document the new implicit uapi for this. v4: Kdoc modification. v3: fix typos and less emphasis v2: Fixed suggestions on formatting [Daniel] Signed-off-by: Matthew Auld Signed-off-by: Ramalingam C Signed-off-by: Robert Beckett Acked-by: Jordan Justen Reviewed-by: Ramalingam C Reviewed-by: Thomas Hellström cc: Simon Ser cc: Pekka Paalanen Cc: Jordan Justen Cc: Kenneth Graunke Cc: mesa-dev@lists.freedesktop.org Cc: Tony Ye Cc: Slawomir Milczarek Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220218184752.7524-13-ramalingam.c@intel.com --- include/uapi/drm/i915_drm.h | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 914ebd9290e5..05c3642aaece 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1118,10 +1118,16 @@ struct drm_i915_gem_exec_object2 { /** * When the EXEC_OBJECT_PINNED flag is specified this is populated by * the user with the GTT offset at which this object will be pinned. + * * When the I915_EXEC_NO_RELOC flag is specified this must contain the * presumed_offset of the object. + * * During execbuffer2 the kernel populates it with the value of the * current GTT offset of the object, for future presumed_offset writes. + * + * See struct drm_i915_gem_create_ext for the rules when dealing with + * alignment restrictions with I915_MEMORY_CLASS_DEVICE, on devices with + * minimum page sizes, like DG2. */ __u64 offset; @@ -3144,11 +3150,40 @@ struct drm_i915_gem_create_ext { * * The (page-aligned) allocated size for the object will be returned. * - * Note that for some devices we have might have further minimum - * page-size restrictions(larger than 4K), like for device local-memory. - * However in general the final size here should always reflect any - * rounding up, if for example using the I915_GEM_CREATE_EXT_MEMORY_REGIONS - * extension to place the object in device local-memory. + * + * DG2 64K min page size implications: + * + * On discrete platforms, starting from DG2, we have to contend with GTT + * page size restrictions when dealing with I915_MEMORY_CLASS_DEVICE + * objects. Specifically the hardware only supports 64K or larger GTT + * page sizes for such memory. The kernel will already ensure that all + * I915_MEMORY_CLASS_DEVICE memory is allocated using 64K or larger page + * sizes underneath. + * + * Note that the returned size here will always reflect any required + * rounding up done by the kernel, i.e 4K will now become 64K on devices + * such as DG2. + * + * Special DG2 GTT address alignment requirement: + * + * The GTT alignment will also need to be at least 2M for such objects. + * + * Note that due to how the hardware implements 64K GTT page support, we + * have some further complications: + * + * 1) The entire PDE (which covers a 2MB virtual address range), must + * contain only 64K PTEs, i.e mixing 4K and 64K PTEs in the same + * PDE is forbidden by the hardware. + * + * 2) We still need to support 4K PTEs for I915_MEMORY_CLASS_SYSTEM + * objects. + * + * To keep things simple for userland, we mandate that any GTT mappings + * must be aligned to and rounded up to 2MB. The kernel will internally + * pad them out to the next 2MB boundary. As this only wastes virtual + * address space and avoids userland having to copy any needlessly + * complicated PDE sharing scheme (coloring) and only affects DG2, this + * is deemed to be a good compromise. */ __u64 size; /** -- cgit v1.2.3 From a1dc6308865df719efb2a2f8a5f0f5979602d267 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Fri, 21 Jan 2022 16:08:16 +1030 Subject: fsi: sbefifo: Implement FSI_SBEFIFO_READ_TIMEOUT_SECONDS ioctl FSI_SBEFIFO_READ_TIMEOUT_SECONDS ioctl sets the read timeout (in seconds) for the response received by sbefifo device from sbe. The timeout affects only the read operation on current sbefifo device fd. Certain SBE operations can take long time to complete and the default timeout of 10 seconds might not be sufficient to start receiving response from SBE. In such cases, allow the timeout to be set to the maximum of 120 seconds. The kernel does not contain the definition of the various SBE operations, so we must expose an interface to userspace to set the timeout for the given operation. Signed-off-by: Amitay Isaacs Signed-off-by: Joel Stanley Reviewed-by: Eddie James Link: https://lore.kernel.org/r/20220121053816.82253-3-joel@jms.id.au Signed-off-by: Joel Stanley --- include/uapi/linux/fsi.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/fsi.h b/include/uapi/linux/fsi.h index da577ecd90e7..b2f1977378c7 100644 --- a/include/uapi/linux/fsi.h +++ b/include/uapi/linux/fsi.h @@ -55,4 +55,18 @@ struct scom_access { #define FSI_SCOM_WRITE _IOWR('s', 0x02, struct scom_access) #define FSI_SCOM_RESET _IOW('s', 0x03, __u32) +/* + * /dev/sbefifo* ioctl interface + */ + +/** + * FSI_SBEFIFO_READ_TIMEOUT sets the read timeout for response from SBE. + * + * The read timeout is specified in seconds. The minimum value of read + * timeout is 10 seconds (default) and the maximum value of read timeout is + * 120 seconds. A read timeout of 0 will reset the value to the default of + * (10 seconds). + */ +#define FSI_SBEFIFO_READ_TIMEOUT_SECONDS _IOW('s', 0x00, __u32) + #endif /* _UAPI_LINUX_FSI_H */ -- cgit v1.2.3 From 129e3c1bab24d27d0fa6e505a472345a92d7a2b0 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 21 Feb 2022 13:54:57 +0800 Subject: bonding: add new option ns_ip6_target This patch add a new bonding option ns_ip6_target, which correspond to the arp_ip_target. With this we set IPv6 targets and send IPv6 NS request to determine the health of the link. For other related options like the validation, we still use arp_validate, and will change to ns_validate later. Note: the sysfs configuration support was removed based on https://lore.kernel.org/netdev/8863.1645071997@famine Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6218f93f5c1a..e1ba2d51b717 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -860,6 +860,7 @@ enum { IFLA_BOND_PEER_NOTIF_DELAY, IFLA_BOND_AD_LACP_ACTIVE, IFLA_BOND_MISSED_MAX, + IFLA_BOND_NS_IP6_TARGET, __IFLA_BOND_MAX, }; -- cgit v1.2.3 From c086df4902573e2f06c6a2a83452c13a8bc603f5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 10 Jan 2022 18:52:52 -0500 Subject: fuse: move FUSE_SUPER_MAGIC definition to magic.h ...to help userland apps that need to identify FUSE mounts. Signed-off-by: Jeff Layton Signed-off-by: Miklos Szeredi --- include/uapi/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 0425cd79af9a..f724129c0425 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -36,6 +36,7 @@ #define EFIVARFS_MAGIC 0xde5e81e4 #define HOSTFS_SUPER_MAGIC 0x00c0ffee #define OVERLAYFS_SUPER_MAGIC 0x794c7630 +#define FUSE_SUPER_MAGIC 0x65735546 #define MINIX_SUPER_MAGIC 0x137F /* minix v1 fs, 14 char names */ #define MINIX_SUPER_MAGIC2 0x138F /* minix v1 fs, 30 char names */ -- cgit v1.2.3 From d43583b890e7cb0078d13d056753a56602b92406 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 Feb 2022 15:35:23 +0000 Subject: KVM: arm64: Expose PSCI SYSTEM_RESET2 call to the guest PSCI v1.1 introduces the optional SYSTEM_RESET2 call, which allows the caller to provide a vendor-specific "reset type" and "cookie" to request a particular form of reset or shutdown. Expose this call to the guest and handle it in the same way as PSCI SYSTEM_RESET, along with some basic range checking on the type argument. Cc: Marc Zyngier Cc: James Morse Cc: Alexandru Elisei Cc: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220221153524.15397-3-will@kernel.org --- include/uapi/linux/psci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 2fcad1dd0b0e..2bf93c0d6354 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -82,6 +82,10 @@ #define PSCI_0_2_TOS_UP_NO_MIGRATE 1 #define PSCI_0_2_TOS_MP 2 +/* PSCI v1.1 reset type encoding for SYSTEM_RESET2 */ +#define PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET 0 +#define PSCI_1_1_RESET_TYPE_VENDOR_START 0x80000000U + /* PSCI version decoding (independent of PSCI version) */ #define PSCI_VERSION_MAJOR_SHIFT 16 #define PSCI_VERSION_MINOR_MASK \ -- cgit v1.2.3 From a603ca60cebff8589882427a67f870ed946b3fc8 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Thu, 10 Feb 2022 16:42:03 -0800 Subject: serial: 8250_aspeed_vuart: add PORT_ASPEED_VUART port type Commit 54da3e381c2b ("serial: 8250_aspeed_vuart: use UPF_IOREMAP to set up register mapping") fixed a bug that had, as a side-effect, prevented the 8250_aspeed_vuart driver from enabling the VUART's FIFOs. However, fixing that (and hence enabling the FIFOs) has in turn revealed what appears to be a hardware bug in the ASPEED VUART in which the host-side THRE bit doesn't get if the BMC-side receive FIFO trigger level is set to anything but one byte. This causes problems for polled-mode writes from the host -- for example, Linux kernel console writes proceed at a glacial pace (less than 100 bytes per second) because the write path waits for a 10ms timeout to expire after every character instead of being able to continue on to the next character upon seeing THRE asserted. (GRUB behaves similarly.) As a workaround, introduce a new port type for the ASPEED VUART that's identical to PORT_16550A as it had previously been using, but with UART_FCR_R_TRIG_00 instead to set the receive FIFO trigger level to one byte, which (experimentally) seems to avoid the problematic THRE behavior. Fixes: 54da3e381c2b ("serial: 8250_aspeed_vuart: use UPF_IOREMAP to set up register mapping") Tested-by: Konstantin Aladyshev Reviewed-by: Andy Shevchenko Signed-off-by: Zev Weiss Link: https://lore.kernel.org/r/20220211004203.14915-1-zev@bewilderbeest.net Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index c4042dcfdc0c..8885e69178bd 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -68,6 +68,9 @@ /* NVIDIA Tegra Combined UART */ #define PORT_TEGRA_TCU 41 +/* ASPEED AST2x00 virtual UART */ +#define PORT_ASPEED_VUART 42 + /* Intel EG20 */ #define PORT_PCH_8LINE 44 #define PORT_PCH_2LINE 45 -- cgit v1.2.3 From a1a5cfe70cd29a59a9a85290dfe95ed1c8df1193 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Mon, 14 Feb 2022 09:38:06 +0200 Subject: iio: introduce mag_referenced Some accelerometers that support activity and inactivity events also support a referenced mode, in which the gravitational acceleration is taken as a point of reference before comparing the acceleration to the specified activity and inactivity magnitude. For example, in the case of the ADXL367, for activity detection, the formula is: abs(acceleration - reference) > magnitude Add a new event type that makes this behavior clear. Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20220214073810.781016-2-cosmin.tanislav@analog.com Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 48c13147c0a8..472cead10d8d 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -104,6 +104,7 @@ enum iio_event_type { IIO_EV_TYPE_THRESH_ADAPTIVE, IIO_EV_TYPE_MAG_ADAPTIVE, IIO_EV_TYPE_CHANGE, + IIO_EV_TYPE_MAG_REFERENCED, }; enum iio_event_direction { -- cgit v1.2.3 From 0fbb4d93b38bce1f8235aacfa37e90ad8f011473 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:32 -0500 Subject: dm: add dm_submit_bio_remap interface Where possible, switch from early bio-based IO accounting (at the time DM clones each incoming bio) to late IO accounting just before each remapped bio is issued to underlying device via submit_bio_noacct(). Allows more precise bio-based IO accounting for DM targets that use their own workqueues to perform additional processing of each bio in conjunction with their DM_MAPIO_SUBMITTED return from their map function. When a target is updated to use dm_submit_bio_remap() they must also set ti->accounts_remapped_io to true. Use xchg() in start_io_acct(), as suggested by Mikulas, to ensure each IO is only started once. The xchg race only happens if __send_duplicate_bios() sends multiple bios -- that case is reflected via tio->is_duplicate_bio. Given the niche nature of this race, it is best to avoid any xchg performance penalty for normal IO. For IO that was never submitted with dm_bio_submit_remap(), but the target completes the clone with bio_endio, accounting is started then ended and pending_io counter decremented. Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- include/uapi/linux/dm-ioctl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index c12ce30b52df..2e9550fef90f 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -286,9 +286,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 45 +#define DM_VERSION_MINOR 46 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2021-03-22)" +#define DM_VERSION_EXTRA "-ioctl (2022-02-22)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 93b71801a8274cd9511557faf04365a5de487197 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Feb 2022 09:06:54 -0500 Subject: KVM: PPC: reserve capability 210 for KVM_CAP_PPC_AIL_MODE_3 Add KVM_CAP_PPC_AIL_MODE_3 to advertise the capability to set the AIL resource mode to 3 with the H_SET_MODE hypercall. This capability differs between processor types and KVM types (PR, HV, Nested HV), and affects guest-visible behaviour. QEMU will implement a cap-ail-mode-3 to control this behaviour[1], and use the KVM CAP if available to determine KVM support[2]. Reviewed-by: Fabiano Rosas Signed-off-by: Nicholas Piggin Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5191b57e1562..507ee1f2aa96 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1134,6 +1134,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 #define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_PPC_AIL_MODE_3 210 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From a21d9a670d81103db7f788de1a4a4a6e4b891a0b Mon Sep 17 00:00:00 2001 From: Hans Schultz Date: Wed, 23 Feb 2022 11:16:46 +0100 Subject: net: bridge: Add support for bridge port in locked mode In a 802.1X scenario, clients connected to a bridge port shall not be allowed to have traffic forwarded until fully authenticated. A static fdb entry of the clients MAC address for the bridge port unlocks the client and allows bidirectional communication. This scenario is facilitated with setting the bridge port in locked mode, which is also supported by various switchcore chipsets. Signed-off-by: Hans Schultz Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e1ba2d51b717..be09d2ad4b5d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -537,6 +537,7 @@ enum { IFLA_BRPORT_MRP_IN_OPEN, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, + IFLA_BRPORT_LOCKED, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) -- cgit v1.2.3 From 1241e329ce2e1f5b1039fd356b75867b29721ad2 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 23 Feb 2022 00:09:12 +0530 Subject: ethtool: add support to set/get completion queue event size Add support to set completion queue event size via ethtool -G parameter and get it via ethtool -g parameter. ~ # ./ethtool -G eth0 cqe-size 512 ~ # ./ethtool -g eth0 Ring parameters for eth0: Pre-set maximums: RX: 1048576 RX Mini: n/a RX Jumbo: n/a TX: 1048576 Current hardware settings: RX: 256 RX Mini: n/a RX Jumbo: n/a TX: 4096 RX Buf Len: 2048 CQE Size: 128 Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 417d4280d7b5..979850221b8d 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -337,6 +337,7 @@ enum { ETHTOOL_A_RINGS_TX, /* u32 */ ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ + ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, -- cgit v1.2.3 From 28a3f0601727d521a1c6cce62ecbcb7402a9e4f5 Mon Sep 17 00:00:00 2001 From: Toms Atteka Date: Wed, 23 Feb 2022 16:54:09 -0800 Subject: net: openvswitch: IPv6: Add IPv6 extension header support This change adds a new OpenFlow field OFPXMT_OFB_IPV6_EXTHDR and packets can be filtered using ipv6_ext flag. Signed-off-by: Toms Atteka Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 150bcff49b1c..9d1710f20505 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -351,6 +351,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */ OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */ OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */ + OVS_KEY_ATTR_IPV6_EXTHDRS, /* struct ovs_key_ipv6_exthdr */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -430,6 +431,11 @@ struct ovs_key_ipv6 { __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */ }; +/* separate structure to support backward compatibility with older user space */ +struct ovs_key_ipv6_exthdrs { + __u16 hdrs; +}; + struct ovs_key_tcp { __be16 tcp_src; __be16 tcp_dst; -- cgit v1.2.3 From ba7bb663f5547ef474c98df99a97bb4a13c5715f Mon Sep 17 00:00:00 2001 From: David Dunn Date: Wed, 23 Feb 2022 22:57:41 +0000 Subject: KVM: x86: Provide per VM capability for disabling PMU virtualization Add a new capability, KVM_CAP_PMU_CAPABILITY, that takes a bitmask of settings/features to allow userspace to configure PMU virtualization on a per-VM basis. For now, support a single flag, KVM_PMU_CAP_DISABLE, to allow disabling PMU virtualization for a VM even when KVM is configured with enable_pmu=true a module level. To keep KVM simple, disallow changing VM's PMU configuration after vCPUs have been created. Signed-off-by: David Dunn Message-Id: <20220223225743.2703915-2-daviddunn@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a02bbf8fd0f6..d2f1efc3aa35 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1142,6 +1142,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SYS_ATTRIBUTES 209 #define KVM_CAP_PPC_AIL_MODE_3 210 #define KVM_CAP_S390_MEM_OP_EXTENSION 211 +#define KVM_CAP_PMU_CAPABILITY 212 #ifdef KVM_CAP_IRQ_ROUTING @@ -1978,6 +1979,8 @@ struct kvm_dirty_gfn { #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) +#define KVM_PMU_CAP_DISABLE (1 << 0) + /** * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. * @flags: Some extra information for header, always 0 for now. -- cgit v1.2.3 From 43245eca6e670ebf65908b549641c1460a9cc944 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 2 Feb 2022 17:55:02 -0500 Subject: NFSv4.1 support for NFS4_RESULT_PRESERVER_UNLINKED In 4.1+, the server is allowed to set a flag NFS4_RESULT_PRESERVE_UNLINKED in reply to the OPEN, that tells the client that it does not need to do a silly rename of an opened file when it's being removed. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/uapi/linux/nfs4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 800bb0ffa6e6..1d2043708bf1 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -45,6 +45,7 @@ #define NFS4_OPEN_RESULT_CONFIRM 0x0002 #define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 +#define NFS4_OPEN_RESULT_PRESERVE_UNLINKED 0x0008 #define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020 #define NFS4_SHARE_ACCESS_MASK 0x000F -- cgit v1.2.3 From 9e8d5470325f25bed7d33f9faaae6d5e4f313650 Mon Sep 17 00:00:00 2001 From: Hammer Hsieh Date: Tue, 22 Feb 2022 17:36:03 +0800 Subject: serial: sunplus-uart: Add Sunplus SoC UART Driver Add Sunplus SoC UART Driver. SP7021 UART block contains 5 UARTs. There are UART0~4 that supported in SP7021, the features list as below. Support Full-duplex communication. Support data packet length configurable. Support stop bit number configurable. Support force break condition. Support baud rate configurable. Support error detection and report. Support RXD Noise Rejection Vote configurable. UART0 pinout only support TX/RX two pins. UART1 to UART4 pinout support TX/RX/CTS/RTS four pins. Normally UART0 used for kernel console, also can be used for normal uart. Command line set "console=ttySUP0,115200", SUP means Sunplus Uart Port. UART driver probe will create path named "/dev/ttySUPx". https://sunplus.atlassian.net/wiki/spaces/doc/pages/1873412290/13.+Universal+Asynchronous+Receiver+Transmitter+UART Signed-off-by: Hammer Hsieh Link: https://lore.kernel.org/r/1645522563-17183-3-git-send-email-hammerh0314@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 8885e69178bd..6faf502b7860 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -277,4 +277,7 @@ /* Freescale LINFlexD UART */ #define PORT_LINFLEXUART 122 +/* Sunplus UART */ +#define PORT_SUNPLUS 123 + #endif /* _UAPILINUX_SERIAL_CORE_H */ -- cgit v1.2.3 From c2faf737abfb10f88f2d2612d573e9edc3c42c37 Mon Sep 17 00:00:00 2001 From: Max Staudt Date: Fri, 11 Feb 2022 15:10:36 +0100 Subject: tty: Reserve ldisc 29 for development purposes It's handy to have an ldisc number free for out-of-tree testing. This way, a new ldisc can be developed on any running system, without having to recompile the kernel just to define a new number. This is the highest number (and also the last one) available under the old numbering scheme, so let's reserve it before it's too late. From now on, every new ldisc upstreamed will have to increment NR_LDISCS in lockstep with its addition to the table in tty.h. Signed-off-by: Max Staudt Link: https://lore.kernel.org/r/20220211141036.6403-1-max@enpas.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/tty.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/tty.h b/include/uapi/linux/tty.h index a58deb3061eb..9d0f06bfbac3 100644 --- a/include/uapi/linux/tty.h +++ b/include/uapi/linux/tty.h @@ -6,8 +6,6 @@ * 'tty.h' defines some structures used by tty_io.c and some defines. */ -#define NR_LDISCS 30 - /* line disciplines */ #define N_TTY 0 #define N_SLIP 1 @@ -39,5 +37,9 @@ #define N_SPEAKUP 26 /* Speakup communication with synths */ #define N_NULL 27 /* Null ldisc used for error handling */ #define N_MCTP 28 /* MCTP-over-serial */ +#define N_DEVELOPMENT 29 /* Manual out-of-tree testing */ + +/* Always the newest line discipline + 1 */ +#define NR_LDISCS 30 #endif /* _UAPI_LINUX_TTY_H */ -- cgit v1.2.3 From fad278388e01e3658a356118bed8ee2c2408d280 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 May 2021 21:15:15 -0700 Subject: media: omap3isp: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Wrap the target region in struct_group(). This additionally fixes a theoretical misalignment of the copy (since the size of "buf" changes between 64-bit and 32-bit, but this is likely never built for 64-bit). FWIW, I think this code is totally broken on 64-bit (which appears to not be a "real" build configuration): it would either always fail (with an uninitialized data->buf_size) or would cause corruption in userspace due to the copy_to_user() in the call path against an uninitialized data->buf value: omap3isp_stat_request_statistics_time32(...) struct omap3isp_stat_data data64; ... omap3isp_stat_request_statistics(stat, &data64); int omap3isp_stat_request_statistics(struct ispstat *stat, struct omap3isp_stat_data *data) ... buf = isp_stat_buf_get(stat, data); static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat, struct omap3isp_stat_data *data) ... if (buf->buf_size > data->buf_size) { ... return ERR_PTR(-EINVAL); } ... rval = copy_to_user(data->buf, buf->virt_addr, buf->buf_size); Regardless, additionally initialize data64 to be zero-filled to avoid undefined behavior. Cc: Laurent Pinchart Cc: Mauro Carvalho Chehab Cc: Arnd Bergmann Cc: Sakari Ailus Cc: linux-media@vger.kernel.org Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data") Cc: stable@vger.kernel.org Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor Signed-off-by: Kees Cook --- include/uapi/linux/omap3isp.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h index 87b55755f4ff..d9db7ad43890 100644 --- a/include/uapi/linux/omap3isp.h +++ b/include/uapi/linux/omap3isp.h @@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { * struct omap3isp_stat_data - Statistic data sent to or received from user * @ts: Timestamp of returned framestats. * @buf: Pointer to pass to user. + * @buf_size: Size of buffer. * @frame_number: Frame number of requested stats. * @cur_frame: Current frame number being processed. * @config_counter: Number of the configuration associated with the data. @@ -176,10 +177,12 @@ struct omap3isp_stat_data { struct timeval ts; #endif void __user *buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #ifdef __KERNEL__ @@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { __s32 tv_usec; } ts; __u32 buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #endif -- cgit v1.2.3 From 89377bc1975c2993bde4a498a3a4e5817ac0ae2c Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Thu, 10 Feb 2022 11:07:55 +0530 Subject: nvme: add vectored-io support for user-passthrough Add a new NVME_IOCTL_IO64_CMD_VEC ioctl that works like the existing NVME_IOCTL_IO64_CMD ioctl except that it takes and array of iovecs and thus supports vectored I/O. - cmd.addr is base address of user iovec array - cmd.vec_cnt is count of iovec array elements This patch does not include vectored-variant for admin-commands as most of them are light on buffers and likely to have low invocation frequency. Signed-off-by: Kanchan Joshi Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/uapi/linux/nvme_ioctl.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index d99b5a772698..b2e43185e3b5 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -55,7 +55,10 @@ struct nvme_passthru_cmd64 { __u64 metadata; __u64 addr; __u32 metadata_len; - __u32 data_len; + union { + __u32 data_len; /* for non-vectored io */ + __u32 vec_cnt; /* for vectored io */ + }; __u32 cdw10; __u32 cdw11; __u32 cdw12; @@ -78,5 +81,6 @@ struct nvme_passthru_cmd64 { #define NVME_IOCTL_RESCAN _IO('N', 0x46) #define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64) #define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64) +#define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64) #endif /* _UAPI_LINUX_NVME_IOCTL_H */ -- cgit v1.2.3 From aa3766def7506e5d9bd6c8387dcfe3629eb2a1f2 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 18 Jan 2022 14:58:30 +0200 Subject: habanalabs: expose number of user interrupts Currently we only expose to the user the ID of the first available user interrupt. To make user interrupts allocation truly dynamic, we need to also expose the number of user interrupts. Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 371dfc4243b3..12976f7a8d84 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -404,6 +404,8 @@ enum hl_server_type { * @cpucp_version: The CPUCP f/w version. * @card_name: The card name as passed by the f/w. * @dram_page_size: The DRAM physical page size. + * @number_of_user_interrupts: The number of interrupts that are available to the userspace + * application to use. Relevant for Gaudi2 and later. */ struct hl_info_hw_ip_info { __u64 sram_base_address; @@ -428,6 +430,9 @@ struct hl_info_hw_ip_info { __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; __u64 reserved2; __u64 dram_page_size; + __u32 reserved3; + __u16 number_of_user_interrupts; + __u16 pad2; }; struct hl_info_dram_usage { -- cgit v1.2.3 From 9158bf69e74f98fea6847cca93bbf33a589bebcd Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Thu, 23 Dec 2021 13:24:34 +0200 Subject: habanalabs: Timestamps buffers registration Timestamp registration API allows the user to register a timestamp record event which will make the driver set timestamp when CQ counter reaches the target value and write it to a specific location specified by the user. This is a non blocking API, unlike the wait_for_interrupt which is a blocking one. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 12976f7a8d84..e21db03196ae 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note * - * Copyright 2016-2020 HabanaLabs, Ltd. + * Copyright 2016-2021 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -30,6 +30,9 @@ */ #define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 72 +/* Max number of elements in timestamps registration buffers */ +#define TS_MAX_ELEMENTS_NUM (1 << 20) /* 1MB */ + /* * Goya queue Numbering * @@ -695,10 +698,12 @@ struct hl_cb_in { __u64 cb_handle; /* HL_CB_OP_* */ __u32 op; + /* Size of CB. Maximum size is HL_MAX_CB_SIZE. The minimum size that * will be allocated, regardless of this parameter's value, is PAGE_SIZE */ __u32 cb_size; + /* Context ID - Currently not in use */ __u32 ctx_id; /* HL_CB_FLAGS_* */ @@ -964,6 +969,7 @@ union hl_cs_args { #define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 #define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 #define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10 +#define HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT 0x20 #define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32 @@ -1036,6 +1042,20 @@ struct hl_wait_cs_in { * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set */ __u64 cq_counters_offset; + + /* + * Timestamp_handle timestamps buffer handle. + * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set + */ + __u64 timestamp_handle; + + /* + * Timestamp_offset is offset inside the timestamp buffer pointed by timestamp_handle above. + * upon interrupt, if the cq reached the target value then driver will write + * timestamp to this offset. + * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set + */ + __u64 timestamp_offset; }; #define HL_WAIT_CS_STATUS_COMPLETED 0 @@ -1082,6 +1102,14 @@ union hl_wait_cs_args { */ #define HL_MEM_OP_EXPORT_DMABUF_FD 5 +/* Opcode to create timestamps pool for user interrupts registration support + * The memory will be allocated by the kernel driver, A timestamp buffer which the user + * will get handle to it for mmap, and another internal buffer used by the + * driver for registration management + * The memory will be freed when the user closes the file descriptor(ctx close) + */ +#define HL_MEM_OP_TS_ALLOC 6 + /* Memory flags */ #define HL_MEM_CONTIGUOUS 0x1 #define HL_MEM_SHARED 0x2 @@ -1173,9 +1201,14 @@ struct hl_mem_in { * DMA-BUF file/FD flags. */ __u32 flags; + /* Context ID - Currently not in use */ __u32 ctx_id; - __u32 pad; + + /* number of timestamp elements + * used only when HL_MEM_OP_TS_ALLOC opcode + */ + __u32 num_of_elements; }; struct hl_mem_out { -- cgit v1.2.3 From 9349a321d327d08a4e91f9b6aca9c1f3a3a25306 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Mon, 14 Feb 2022 15:20:08 +0200 Subject: habanalabs: use kernel-doc for memory ioctl documentation Re-format the comments for the memory ioctl structure to be in kernel-doc style. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 111 +++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 54 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index e21db03196ae..a2a953a91b5e 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1116,98 +1116,101 @@ union hl_wait_cs_args { #define HL_MEM_USERPTR 0x4 #define HL_MEM_FORCE_HINT 0x8 +/** + * structure hl_mem_in - structure that handle input args for memory IOCTL + * @union arg: union of structures to be used based on the input operation + * @op: specify the requested memory operation (one of the HL_MEM_OP_* definitions). + * @flags: flags for the memory operation (one of the HL_MEM_* definitions). + * For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the DMA-BUF file/FD flags. + * @ctx_id: context ID - currently not in use. + * @num_of_elements: number of timestamp elements used only with HL_MEM_OP_TS_ALLOC opcode. + */ struct hl_mem_in { union { - /* HL_MEM_OP_ALLOC- allocate device memory */ + /** + * structure for device memory allocation (used with the HL_MEM_OP_ALLOC op) + * @mem_size: memory size to allocate + */ struct { - /* Size to alloc */ __u64 mem_size; } alloc; - /* HL_MEM_OP_FREE - free device memory */ + /** + * structure for free-ing device memory (used with the HL_MEM_OP_FREE op) + * @handle: handle returned from HL_MEM_OP_ALLOC + */ struct { - /* Handle returned from HL_MEM_OP_ALLOC */ __u64 handle; } free; - /* HL_MEM_OP_MAP - map device memory */ + /** + * structure for mapping device memory (used with the HL_MEM_OP_MAP op) + * @hint_addr: requested virtual address of mapped memory. + * the driver will try to map the requested region to this hint + * address, as long as the address is valid and not already mapped. + * the user should check the returned address of the IOCTL to make + * sure he got the hint address. + * passing 0 here means that the driver will choose the address itself. + * @handle: handle returned from HL_MEM_OP_ALLOC. + */ struct { - /* - * Requested virtual address of mapped memory. - * The driver will try to map the requested region to - * this hint address, as long as the address is valid - * and not already mapped. The user should check the - * returned address of the IOCTL to make sure he got - * the hint address. Passing 0 here means that the - * driver will choose the address itself. - */ __u64 hint_addr; - /* Handle returned from HL_MEM_OP_ALLOC */ __u64 handle; } map_device; - /* HL_MEM_OP_MAP - map host memory */ + /** + * structure for mapping host memory (used with the HL_MEM_OP_MAP op) + * @host_virt_addr: address of allocated host memory. + * @hint_addr: requested virtual address of mapped memory. + * the driver will try to map the requested region to this hint + * address, as long as the address is valid and not already mapped. + * the user should check the returned address of the IOCTL to make + * sure he got the hint address. + * passing 0 here means that the driver will choose the address itself. + * @size: size of allocated host memory. + */ struct { - /* Address of allocated host memory */ __u64 host_virt_addr; - /* - * Requested virtual address of mapped memory. - * The driver will try to map the requested region to - * this hint address, as long as the address is valid - * and not already mapped. The user should check the - * returned address of the IOCTL to make sure he got - * the hint address. Passing 0 here means that the - * driver will choose the address itself. - */ __u64 hint_addr; - /* Size of allocated host memory */ __u64 mem_size; } map_host; - /* HL_MEM_OP_MAP_BLOCK - map a hw block */ + /** + * structure for mapping hw block (used with the HL_MEM_OP_MAP_BLOCK op) + * @block_addr:HW block address to map, a handle and size will be returned + * to the user and will be used to mmap the relevant block. + * only addresses from configuration space are allowed. + */ struct { - /* - * HW block address to map, a handle and size will be - * returned to the user and will be used to mmap the - * relevant block. Only addresses from configuration - * space are allowed. - */ __u64 block_addr; } map_block; - /* HL_MEM_OP_UNMAP - unmap host memory */ + /** + * structure for unmapping host memory (used with the HL_MEM_OP_UNMAP op) + * @device_virt_addr: virtual address returned from HL_MEM_OP_MAP + */ struct { - /* Virtual address returned from HL_MEM_OP_MAP */ __u64 device_virt_addr; } unmap; - /* HL_MEM_OP_EXPORT_DMABUF_FD */ + /** + * structure for exporting DMABUF object (used with + * the HL_MEM_OP_EXPORT_DMABUF_FD op) + * @handle: handle returned from HL_MEM_OP_ALLOC. + * in Gaudi, where we don't have MMU for the device memory, the + * driver expects a physical address (instead of a handle) in the + * device memory space. + * @mem_size: size of memory allocation. Relevant only for GAUDI + */ struct { - /* Handle returned from HL_MEM_OP_ALLOC. In Gaudi, - * where we don't have MMU for the device memory, the - * driver expects a physical address (instead of - * a handle) in the device memory space. - */ __u64 handle; - /* Size of memory allocation. Relevant only for GAUDI */ __u64 mem_size; } export_dmabuf_fd; }; - /* HL_MEM_OP_* */ __u32 op; - /* HL_MEM_* flags. - * For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the - * DMA-BUF file/FD flags. - */ __u32 flags; - - /* Context ID - Currently not in use */ __u32 ctx_id; - - /* number of timestamp elements - * used only when HL_MEM_OP_TS_ALLOC opcode - */ __u32 num_of_elements; }; -- cgit v1.2.3 From f23f280277d5a701de99c6652623b6bf8801c534 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Mon, 14 Feb 2022 15:22:00 +0200 Subject: habanalabs: allow user to set allocation page size In future ASICs the MMU will be able to work with multiple page sizes, thus a new flag is added to allow the user to set the requested page size. This flag is added since the whole DRAM is allocated for the user and the user also should be familiar with the memory usage use case. As such, the user may choose to "over allocate" memory in favor of performance (for instance- large page allocations covers more memory in less TLB entries). For example: say available page sizes are of 1MB and 32MB. If user wants to allocate 40MB the user can either set page size to 1MB and allocate the exact amount of memory (but will result in 40 TLB entries) or the user can use 32MB pages, "waste" 8MB of physical memory but occupy only 2 TLB entries. Note that this feature will be available only to ASIC that supports multiple DRAM page sizes. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index a2a953a91b5e..1d6b4f0c4159 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note * - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -1130,9 +1130,12 @@ struct hl_mem_in { /** * structure for device memory allocation (used with the HL_MEM_OP_ALLOC op) * @mem_size: memory size to allocate + * @page_size: page size to use on allocation. when the value is 0 the default page + * size will be taken. */ struct { __u64 mem_size; + __u64 page_size; } alloc; /** -- cgit v1.2.3 From 0c9f178778919bb500443f340647b44227778fb2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:42 +0800 Subject: iommu: Remove guest pasid related interfaces and definitions The guest pasid related uapi interfaces and definitions are not referenced anywhere in the tree. We've also reached a consensus to replace them with a new iommufd design. Remove them to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/uapi/linux/iommu.h | 181 --------------------------------------------- 1 file changed, 181 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index 59178fc229ca..65d8b0234f69 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -158,185 +158,4 @@ struct iommu_page_response { __u32 code; }; -/* defines the granularity of the invalidation */ -enum iommu_inv_granularity { - IOMMU_INV_GRANU_DOMAIN, /* domain-selective invalidation */ - IOMMU_INV_GRANU_PASID, /* PASID-selective invalidation */ - IOMMU_INV_GRANU_ADDR, /* page-selective invalidation */ - IOMMU_INV_GRANU_NR, /* number of invalidation granularities */ -}; - -/** - * struct iommu_inv_addr_info - Address Selective Invalidation Structure - * - * @flags: indicates the granularity of the address-selective invalidation - * - If the PASID bit is set, the @pasid field is populated and the invalidation - * relates to cache entries tagged with this PASID and matching the address - * range. - * - If ARCHID bit is set, @archid is populated and the invalidation relates - * to cache entries tagged with this architecture specific ID and matching - * the address range. - * - Both PASID and ARCHID can be set as they may tag different caches. - * - If neither PASID or ARCHID is set, global addr invalidation applies. - * - The LEAF flag indicates whether only the leaf PTE caching needs to be - * invalidated and other paging structure caches can be preserved. - * @pasid: process address space ID - * @archid: architecture-specific ID - * @addr: first stage/level input address - * @granule_size: page/block size of the mapping in bytes - * @nb_granules: number of contiguous granules to be invalidated - */ -struct iommu_inv_addr_info { -#define IOMMU_INV_ADDR_FLAGS_PASID (1 << 0) -#define IOMMU_INV_ADDR_FLAGS_ARCHID (1 << 1) -#define IOMMU_INV_ADDR_FLAGS_LEAF (1 << 2) - __u32 flags; - __u32 archid; - __u64 pasid; - __u64 addr; - __u64 granule_size; - __u64 nb_granules; -}; - -/** - * struct iommu_inv_pasid_info - PASID Selective Invalidation Structure - * - * @flags: indicates the granularity of the PASID-selective invalidation - * - If the PASID bit is set, the @pasid field is populated and the invalidation - * relates to cache entries tagged with this PASID and matching the address - * range. - * - If the ARCHID bit is set, the @archid is populated and the invalidation - * relates to cache entries tagged with this architecture specific ID and - * matching the address range. - * - Both PASID and ARCHID can be set as they may tag different caches. - * - At least one of PASID or ARCHID must be set. - * @pasid: process address space ID - * @archid: architecture-specific ID - */ -struct iommu_inv_pasid_info { -#define IOMMU_INV_PASID_FLAGS_PASID (1 << 0) -#define IOMMU_INV_PASID_FLAGS_ARCHID (1 << 1) - __u32 flags; - __u32 archid; - __u64 pasid; -}; - -/** - * struct iommu_cache_invalidate_info - First level/stage invalidation - * information - * @argsz: User filled size of this data - * @version: API version of this structure - * @cache: bitfield that allows to select which caches to invalidate - * @granularity: defines the lowest granularity used for the invalidation: - * domain > PASID > addr - * @padding: reserved for future use (should be zero) - * @pasid_info: invalidation data when @granularity is %IOMMU_INV_GRANU_PASID - * @addr_info: invalidation data when @granularity is %IOMMU_INV_GRANU_ADDR - * - * Not all the combinations of cache/granularity are valid: - * - * +--------------+---------------+---------------+---------------+ - * | type / | DEV_IOTLB | IOTLB | PASID | - * | granularity | | | cache | - * +==============+===============+===============+===============+ - * | DOMAIN | N/A | Y | Y | - * +--------------+---------------+---------------+---------------+ - * | PASID | Y | Y | Y | - * +--------------+---------------+---------------+---------------+ - * | ADDR | Y | Y | N/A | - * +--------------+---------------+---------------+---------------+ - * - * Invalidations by %IOMMU_INV_GRANU_DOMAIN don't take any argument other than - * @version and @cache. - * - * If multiple cache types are invalidated simultaneously, they all - * must support the used granularity. - */ -struct iommu_cache_invalidate_info { - __u32 argsz; -#define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1 - __u32 version; -/* IOMMU paging structure cache */ -#define IOMMU_CACHE_INV_TYPE_IOTLB (1 << 0) /* IOMMU IOTLB */ -#define IOMMU_CACHE_INV_TYPE_DEV_IOTLB (1 << 1) /* Device IOTLB */ -#define IOMMU_CACHE_INV_TYPE_PASID (1 << 2) /* PASID cache */ -#define IOMMU_CACHE_INV_TYPE_NR (3) - __u8 cache; - __u8 granularity; - __u8 padding[6]; - union { - struct iommu_inv_pasid_info pasid_info; - struct iommu_inv_addr_info addr_info; - } granu; -}; - -/** - * struct iommu_gpasid_bind_data_vtd - Intel VT-d specific data on device and guest - * SVA binding. - * - * @flags: VT-d PASID table entry attributes - * @pat: Page attribute table data to compute effective memory type - * @emt: Extended memory type - * - * Only guest vIOMMU selectable and effective options are passed down to - * the host IOMMU. - */ -struct iommu_gpasid_bind_data_vtd { -#define IOMMU_SVA_VTD_GPASID_SRE (1 << 0) /* supervisor request */ -#define IOMMU_SVA_VTD_GPASID_EAFE (1 << 1) /* extended access enable */ -#define IOMMU_SVA_VTD_GPASID_PCD (1 << 2) /* page-level cache disable */ -#define IOMMU_SVA_VTD_GPASID_PWT (1 << 3) /* page-level write through */ -#define IOMMU_SVA_VTD_GPASID_EMTE (1 << 4) /* extended mem type enable */ -#define IOMMU_SVA_VTD_GPASID_CD (1 << 5) /* PASID-level cache disable */ -#define IOMMU_SVA_VTD_GPASID_WPE (1 << 6) /* Write protect enable */ -#define IOMMU_SVA_VTD_GPASID_LAST (1 << 7) - __u64 flags; - __u32 pat; - __u32 emt; -}; - -#define IOMMU_SVA_VTD_GPASID_MTS_MASK (IOMMU_SVA_VTD_GPASID_CD | \ - IOMMU_SVA_VTD_GPASID_EMTE | \ - IOMMU_SVA_VTD_GPASID_PCD | \ - IOMMU_SVA_VTD_GPASID_PWT) - -/** - * struct iommu_gpasid_bind_data - Information about device and guest PASID binding - * @argsz: User filled size of this data - * @version: Version of this data structure - * @format: PASID table entry format - * @flags: Additional information on guest bind request - * @gpgd: Guest page directory base of the guest mm to bind - * @hpasid: Process address space ID used for the guest mm in host IOMMU - * @gpasid: Process address space ID used for the guest mm in guest IOMMU - * @addr_width: Guest virtual address width - * @padding: Reserved for future use (should be zero) - * @vtd: Intel VT-d specific data - * - * Guest to host PASID mapping can be an identity or non-identity, where guest - * has its own PASID space. For non-identify mapping, guest to host PASID lookup - * is needed when VM programs guest PASID into an assigned device. VMM may - * trap such PASID programming then request host IOMMU driver to convert guest - * PASID to host PASID based on this bind data. - */ -struct iommu_gpasid_bind_data { - __u32 argsz; -#define IOMMU_GPASID_BIND_VERSION_1 1 - __u32 version; -#define IOMMU_PASID_FORMAT_INTEL_VTD 1 -#define IOMMU_PASID_FORMAT_LAST 2 - __u32 format; - __u32 addr_width; -#define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */ - __u64 flags; - __u64 gpgd; - __u64 hpasid; - __u64 gpasid; - __u8 padding[8]; - /* Vendor specific data */ - union { - struct iommu_gpasid_bind_data_vtd vtd; - } vendor; -}; - #endif /* _UAPI_IOMMU_H */ -- cgit v1.2.3 From 7b8135f4df98b155b23754b6065c157861e268f1 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 1 Mar 2022 05:04:34 +0000 Subject: rtnetlink: add new rtm tunnel api for tunnel id filtering This patch adds new rtm tunnel msg and api for tunnel id filtering in dst_metadata devices. First dst_metadata device to use the api is vxlan driver with AF_BRIDGE family. This and later changes add ability in vxlan driver to do tunnel id filtering (or vni filtering) on dst_metadata devices. This is similar to vlan api in the vlan filtering bridge. this patch includes selinux nlmsg_route_perms support for RTM_*TUNNEL api from Benjamin Poirier. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 26 ++++++++++++++++++++++++++ include/uapi/linux/rtnetlink.h | 9 +++++++++ 2 files changed, 35 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index be09d2ad4b5d..3dfc9ff2ec9b 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -713,7 +713,32 @@ enum ipvlan_mode { #define IPVLAN_F_PRIVATE 0x01 #define IPVLAN_F_VEPA 0x02 +/* Tunnel RTM header */ +struct tunnel_msg { + __u8 family; + __u8 reserved1; + __u16 reserved2; + __u32 ifindex; +}; + /* VXLAN section */ +enum { + VXLAN_VNIFILTER_ENTRY_UNSPEC, + VXLAN_VNIFILTER_ENTRY_START, + VXLAN_VNIFILTER_ENTRY_END, + VXLAN_VNIFILTER_ENTRY_GROUP, + VXLAN_VNIFILTER_ENTRY_GROUP6, + __VXLAN_VNIFILTER_ENTRY_MAX +}; +#define VXLAN_VNIFILTER_ENTRY_MAX (__VXLAN_VNIFILTER_ENTRY_MAX - 1) + +enum { + VXLAN_VNIFILTER_UNSPEC, + VXLAN_VNIFILTER_ENTRY, + __VXLAN_VNIFILTER_MAX +}; +#define VXLAN_VNIFILTER_MAX (__VXLAN_VNIFILTER_MAX - 1) + enum { IFLA_VXLAN_UNSPEC, IFLA_VXLAN_ID, @@ -745,6 +770,7 @@ enum { IFLA_VXLAN_GPE, IFLA_VXLAN_TTL_INHERIT, IFLA_VXLAN_DF, + IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 93d934cc4613..0970cb4b1b88 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -185,6 +185,13 @@ enum { RTM_GETNEXTHOPBUCKET, #define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET + RTM_NEWTUNNEL = 120, +#define RTM_NEWTUNNEL RTM_NEWTUNNEL + RTM_DELTUNNEL, +#define RTM_DELTUNNEL RTM_DELTUNNEL + RTM_GETTUNNEL, +#define RTM_GETTUNNEL RTM_GETTUNNEL + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; @@ -756,6 +763,8 @@ enum rtnetlink_groups { #define RTNLGRP_BRVLAN RTNLGRP_BRVLAN RTNLGRP_MCTP_IFADDR, #define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR + RTNLGRP_TUNNEL, +#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) -- cgit v1.2.3 From 445b2f36bb4efb81f064e931f28b9ec19f114355 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 1 Mar 2022 05:04:39 +0000 Subject: drivers: vxlan: vnifilter: add support for stats dumping Add support for VXLAN vni filter entries' stats dumping Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 3dfc9ff2ec9b..e315e53125f4 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -716,18 +716,41 @@ enum ipvlan_mode { /* Tunnel RTM header */ struct tunnel_msg { __u8 family; - __u8 reserved1; + __u8 flags; __u16 reserved2; __u32 ifindex; }; /* VXLAN section */ + +/* include statistics in the dump */ +#define TUNNEL_MSG_FLAG_STATS 0x01 + +#define TUNNEL_MSG_VALID_USER_FLAGS TUNNEL_MSG_FLAG_STATS + +/* Embedded inside VXLAN_VNIFILTER_ENTRY_STATS */ +enum { + VNIFILTER_ENTRY_STATS_UNSPEC, + VNIFILTER_ENTRY_STATS_RX_BYTES, + VNIFILTER_ENTRY_STATS_RX_PKTS, + VNIFILTER_ENTRY_STATS_RX_DROPS, + VNIFILTER_ENTRY_STATS_RX_ERRORS, + VNIFILTER_ENTRY_STATS_TX_BYTES, + VNIFILTER_ENTRY_STATS_TX_PKTS, + VNIFILTER_ENTRY_STATS_TX_DROPS, + VNIFILTER_ENTRY_STATS_TX_ERRORS, + VNIFILTER_ENTRY_STATS_PAD, + __VNIFILTER_ENTRY_STATS_MAX +}; +#define VNIFILTER_ENTRY_STATS_MAX (__VNIFILTER_ENTRY_STATS_MAX - 1) + enum { VXLAN_VNIFILTER_ENTRY_UNSPEC, VXLAN_VNIFILTER_ENTRY_START, VXLAN_VNIFILTER_ENTRY_END, VXLAN_VNIFILTER_ENTRY_GROUP, VXLAN_VNIFILTER_ENTRY_GROUP6, + VXLAN_VNIFILTER_ENTRY_STATS, __VXLAN_VNIFILTER_ENTRY_MAX }; #define VXLAN_VNIFILTER_ENTRY_MAX (__VXLAN_VNIFILTER_ENTRY_MAX - 1) -- cgit v1.2.3 From cedd3614e5d9c80908099c19f8716714ce0610b1 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 24 Feb 2022 11:06:54 +0530 Subject: perf: Add irq and exception return branch types This expands generic branch type classification by adding two more entries there in i.e irq and exception return. Also updates the x86 implementation to process X86_BR_IRET and X86_BR_IRQ records as appropriate. This changes branch types reported to user space on x86 platform but it should not be a problem. The possible scenarios and impacts are enumerated here. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1645681014-3346-1-git-send-email-anshuman.khandual@arm.com --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1b65042ab1db..7dc71768749d 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -251,6 +251,8 @@ enum { PERF_BR_SYSRET = 8, /* syscall return */ PERF_BR_COND_CALL = 9, /* conditional function call */ PERF_BR_COND_RET = 10, /* conditional function return */ + PERF_BR_ERET = 11, /* exception return */ + PERF_BR_IRQ = 12, /* irq */ PERF_BR_MAX, }; -- cgit v1.2.3 From 4f0bfdfd8323e5b461fc1042143a1097dba9fced Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 23 Feb 2022 19:34:31 -0800 Subject: ELF: Properly redefine PT_GNU_* in terms of PT_LOOS The PT_GNU_* program header types are actually offsets from PT_LOOS, so redefine them as such, reorder them, and add the missing PT_GNU_RELRO. Cc: Eric Biederman Cc: Peter Collingbourne Cc: Catalin Marinas Cc: Dave Martin Signed-off-by: Kees Cook --- include/uapi/linux/elf.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 61bf4774b8f2..6438d55529bf 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -35,10 +35,11 @@ typedef __s64 Elf64_Sxword; #define PT_HIOS 0x6fffffff /* OS-specific */ #define PT_LOPROC 0x70000000 #define PT_HIPROC 0x7fffffff -#define PT_GNU_EH_FRAME 0x6474e550 -#define PT_GNU_PROPERTY 0x6474e553 - +#define PT_GNU_EH_FRAME (PT_LOOS + 0x474e550) #define PT_GNU_STACK (PT_LOOS + 0x474e551) +#define PT_GNU_RELRO (PT_LOOS + 0x474e552) +#define PT_GNU_PROPERTY (PT_LOOS + 0x474e553) + /* * Extended Numbering -- cgit v1.2.3 From dd0ca255f3d27a1bb43d8e9529fb3645f9a341a3 Mon Sep 17 00:00:00 2001 From: Daniel Braunwarth Date: Mon, 28 Feb 2022 14:30:28 +0100 Subject: if_ether.h: add PROFINET Ethertype Add the Ethertype for PROFINET protocol. Signed-off-by: Daniel Braunwarth Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index c0c2f3ed5729..4f4ed35a16db 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -86,6 +86,7 @@ * over Ethernet */ #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ +#define ETH_P_PROFINET 0x8892 /* PROFINET */ #define ETH_P_REALTEK 0x8899 /* Multiple proprietary protocols */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ -- cgit v1.2.3 From cd73cda742fbe1f33ed7306c7a01aa64f4e6ebd5 Mon Sep 17 00:00:00 2001 From: Daniel Braunwarth Date: Mon, 28 Feb 2022 14:30:29 +0100 Subject: if_ether.h: add EtherCAT Ethertype Add the Ethertype for EtherCAT protocol. Signed-off-by: Daniel Braunwarth Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 4f4ed35a16db..1d0bccc3fa54 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -89,6 +89,7 @@ #define ETH_P_PROFINET 0x8892 /* PROFINET */ #define ETH_P_REALTEK 0x8899 /* Multiple proprietary protocols */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ +#define ETH_P_ETHERCAT 0x88A4 /* EtherCAT */ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ #define ETH_P_PREAUTH 0x88C7 /* 802.11 Preauthentication */ -- cgit v1.2.3 From d58b8a99cbb84c1eb3b3613d23c1a328695a9455 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 25 Feb 2022 12:33:51 -0500 Subject: drm/amdkfd: Add SMI add event helper To remove duplicate code, unify event message format and simplify new event add in the following patches. Use KFD_SMI_EVENT_MSG_SIZE to define msg size, the same size will be used in user space to alloc the msg receive buffer. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index baec5a41de3e..b40687bf1014 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -463,6 +463,7 @@ enum kfd_smi_event { }; #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) +#define KFD_SMI_EVENT_MSG_SIZE 96 struct kfd_ioctl_smi_events_args { __u32 gpuid; /* to KFD */ -- cgit v1.2.3 From 46efc97b73060823fdc18103a5e317a8327d44e1 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:17 +0200 Subject: net: rtnetlink: RTM_GETSTATS: Allow filtering inside nests The filter_mask field of RTM_GETSTATS header determines which top-level attributes should be included in the netlink response. This saves processing time by only including the bits that the user cares about instead of always dumping everything. This is doubly important for HW-backed statistics that would typically require a trip to the device to fetch the stats. So far there was only one HW-backed stat suite per attribute. However, IFLA_STATS_LINK_OFFLOAD_XSTATS is a nest, and will gain a new stat suite in the following patches. It would therefore be advantageous to be able to filter within that nest, and select just one or the other HW-backed statistics suite. Extend rtnetlink so that RTM_GETSTATS permits attributes in the payload. The scheme is as follows: - RTM_GETSTATS - struct if_stats_msg - attr nest IFLA_STATS_GET_FILTERS - attr IFLA_STATS_LINK_OFFLOAD_XSTATS - u32 filter_mask This scheme reuses the existing enumerators by nesting them in a dedicated context attribute. This is covered by policies as usual, therefore a gradual opt-in is possible. Currently only IFLA_STATS_LINK_OFFLOAD_XSTATS nest has filtering enabled, because for the SW counters the issue does not seem to be that important. rtnl_offload_xstats_get_size() and _fill() are extended to observe the requested filters. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e315e53125f4..4d62ea6e1288 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1207,6 +1207,16 @@ enum { #define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1)) +enum { + IFLA_STATS_GETSET_UNSPEC, + IFLA_STATS_GET_FILTERS, /* Nest of IFLA_STATS_LINK_xxx, each a u32 with + * a filter mask for the corresponding group. + */ + __IFLA_STATS_GETSET_MAX, +}; + +#define IFLA_STATS_GETSET_MAX (__IFLA_STATS_GETSET_MAX - 1) + /* These are embedded into IFLA_STATS_LINK_XSTATS: * [IFLA_STATS_LINK_XSTATS] * -> [LINK_XSTATS_TYPE_xxx] -- cgit v1.2.3 From 9309f97aef6d8250bb484dabeac925c3a7c57716 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:20 +0200 Subject: net: dev: Add hardware stats support Offloading switch device drivers may be able to collect statistics of the traffic taking place in the HW datapath that pertains to a certain soft netdevice, such as VLAN. Add the necessary infrastructure to allow exposing these statistics to the offloaded netdevice in question. The API was shaped by the following considerations: - Collection of HW statistics is not free: there may be a finite number of counters, and the act of counting may have a performance impact. It is therefore necessary to allow toggling whether HW counting should be done for any particular SW netdevice. - As the drivers are loaded and removed, a particular device may get offloaded and unoffloaded again. At the same time, the statistics values need to stay monotonic (modulo the eventual 64-bit wraparound), increasing only to reflect traffic measured in the device. To that end, the netdevice keeps around a lazily-allocated copy of struct rtnl_link_stats64. Device drivers then contribute to the values kept therein at various points. Even as the driver goes away, the struct stays around to maintain the statistics values. - Different HW devices may be able to count different things. The motivation behind this patch in particular is exposure of HW counters on Nvidia Spectrum switches, where the only practical approach to counting traffic on offloaded soft netdevices currently is to use router interface counters, and count L3 traffic. Correspondingly that is the statistics suite added in this patch. Other devices may be able to measure different kinds of traffic, and for that reason, the APIs are built to allow uniform access to different statistics suites. - Because soft netdevices and offloading drivers are only loosely bound, a netdevice uses a notifier chain to communicate with the drivers. Several new notifiers, NETDEV_OFFLOAD_XSTATS_*, have been added to carry messages to the offloading drivers. - Devices can have various conditions for when a particular counter is available. As the device is configured and reconfigured, the device offload may become or cease being suitable for counter binding. A netdevice can use a notifier type NETDEV_OFFLOAD_XSTATS_REPORT_USED to ping offloading drivers and determine whether anyone currently implements a given statistics suite. This information can then be propagated to user space. When the driver decides to unoffload a netdevice, it can use a newly-added function, netdev_offload_xstats_report_delta(), to record outstanding collected statistics, before destroying the HW counter. This patch adds a helper, call_netdevice_notifiers_info_robust(), for dispatching a notifier with the possibility of unwind when one of the consumers bails. Given the wish to eventually get rid of the global notifier block altogether, this helper only invokes the per-netns notifier block. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4d62ea6e1288..ef6a62a2e15d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -245,6 +245,21 @@ struct rtnl_link_stats64 { __u64 rx_nohandler; }; +/* Subset of link stats useful for in-HW collection. Meaning of the fields is as + * for struct rtnl_link_stats64. + */ +struct rtnl_hw_stats64 { + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; +}; + /* The struct should be in sync with struct ifmap */ struct rtnl_link_ifmap { __u64 mem_start; -- cgit v1.2.3 From 0e7788fd76222dba3229eada9162efab185923fc Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:21 +0200 Subject: net: rtnetlink: Add UAPI for obtaining L3 offload xstats Add a new IFLA_STATS_LINK_OFFLOAD_XSTATS child attribute, IFLA_OFFLOAD_XSTATS_L3_STATS, to carry statistics for traffic that takes place in a HW router. The offloaded HW stats are designed to allow per-netdevice enablement and disablement. Additionally, as a netdevice is configured, it may become or cease being suitable for binding of a HW counter. Both of these aspects need to be communicated to the userspace. To that end, add another child attribute, IFLA_OFFLOAD_XSTATS_HW_S_INFO: - attr nest IFLA_OFFLOAD_XSTATS_HW_S_INFO - attr nest IFLA_OFFLOAD_XSTATS_L3_STATS - attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST - {0,1} as u8 - attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED - {0,1} as u8 Thus this one attribute is a nest that can be used to carry information about various types of HW statistics, and indexing is very simply done by wrapping the information for a given statistics suite into the attribute that carries the suite is the RTM_GETSTATS query. At the same time, because _HW_S_INFO is nested directly below IFLA_STATS_LINK_OFFLOAD_XSTATS, it is possible through filtering to request only the metadata about individual statistics suites, without having to hit the HW to get the actual counters. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ef6a62a2e15d..b1031f481d2f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1249,10 +1249,21 @@ enum { enum { IFLA_OFFLOAD_XSTATS_UNSPEC, IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ + IFLA_OFFLOAD_XSTATS_HW_S_INFO, /* HW stats info. A nest */ + IFLA_OFFLOAD_XSTATS_L3_STATS, /* struct rtnl_hw_stats64 */ __IFLA_OFFLOAD_XSTATS_MAX }; #define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) +enum { + IFLA_OFFLOAD_XSTATS_HW_S_INFO_UNSPEC, + IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, /* u8 */ + IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, /* u8 */ + __IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX, +}; +#define IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX \ + (__IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX - 1) + /* XDP section */ #define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) -- cgit v1.2.3 From 03ba35667091337d8e632cf4b814f1c1b914609b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:22 +0200 Subject: net: rtnetlink: Add RTM_SETSTATS The offloaded HW stats are designed to allow per-netdevice enablement and disablement. These stats are only accessible through RTM_GETSTATS, and therefore should be toggled by a RTM_SETSTATS message. Add it, and the necessary skeleton handler. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 0970cb4b1b88..14462dc159fd 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -146,6 +146,8 @@ enum { #define RTM_NEWSTATS RTM_NEWSTATS RTM_GETSTATS = 94, #define RTM_GETSTATS RTM_GETSTATS + RTM_SETSTATS, +#define RTM_SETSTATS RTM_SETSTATS RTM_NEWCACHEREPORT = 96, #define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT -- cgit v1.2.3 From 5fd0b838efac16046509f7fb100455d0463b9687 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:23 +0200 Subject: net: rtnetlink: Add UAPI toggle for IFLA_OFFLOAD_XSTATS_L3_STATS The offloaded HW stats are designed to allow per-netdevice enablement and disablement. Add an attribute, IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS, which should be carried by the RTM_SETSTATS message, and expresses a desire to toggle L3 offload xstats on or off. As part of the above, add an exported function rtnl_offload_xstats_notify() that drivers can use when they have installed or deinstalled the counters backing the HW stats. At this point, it is possible to enable, disable and query L3 offload xstats on netdevices. (However there is no driver actually implementing these.) Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + include/uapi/linux/rtnetlink.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b1031f481d2f..ddca20357e7e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1227,6 +1227,7 @@ enum { IFLA_STATS_GET_FILTERS, /* Nest of IFLA_STATS_LINK_xxx, each a u32 with * a filter mask for the corresponding group. */ + IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS, /* 0 or 1 as u8 */ __IFLA_STATS_GETSET_MAX, }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 14462dc159fd..51530aade46e 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -767,6 +767,8 @@ enum rtnetlink_groups { #define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR RTNLGRP_TUNNEL, #define RTNLGRP_TUNNEL RTNLGRP_TUNNEL + RTNLGRP_STATS, +#define RTNLGRP_STATS RTNLGRP_STATS __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) -- cgit v1.2.3 From 115dcec65f61d53e25e1bed5e380468b30f98b14 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Feb 2022 16:20:18 +0200 Subject: vfio: Define device migration protocol v2 Replace the existing region based migration protocol with an ioctl based protocol. The two protocols have the same general semantic behaviors, but the way the data is transported is changed. This is the STOP_COPY portion of the new protocol, it defines the 5 states for basic stop and copy migration and the protocol to move the migration data in/out of the kernel. Compared to the clarification of the v1 protocol Alex proposed: https://lore.kernel.org/r/163909282574.728533.7460416142511440919.stgit@omen This has a few deliberate functional differences: - ERROR arcs allow the device function to remain unchanged. - The protocol is not required to return to the original state on transition failure. Instead userspace can execute an unwind back to the original state, reset, or do something else without needing kernel support. This simplifies the kernel design and should userspace choose a policy like always reset, avoids doing useless work in the kernel on error handling paths. - PRE_COPY is made optional, userspace must discover it before using it. This reflects the fact that the majority of drivers we are aware of right now will not implement PRE_COPY. - segmentation is not part of the data stream protocol, the receiver does not have to reproduce the framing boundaries. The hybrid FSM for the device_state is described as a Mealy machine by documenting each of the arcs the driver is required to implement. Defining the remaining set of old/new device_state transitions as 'combination transitions' which are naturally defined as taking multiple FSM arcs along the shortest path within the FSM's digraph allows a complete matrix of transitions. A new VFIO_DEVICE_FEATURE of VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE is defined to replace writing to the device_state field in the region. This allows returning a brand new FD whenever the requested transition opens a data transfer session. The VFIO core code implements the new feature and provides a helper function to the driver. Using the helper the driver only has to implement 6 of the FSM arcs and the other combination transitions are elaborated consistently from those arcs. A new VFIO_DEVICE_FEATURE of VFIO_DEVICE_FEATURE_MIGRATION is defined to report the capability for migration and indicate which set of states and arcs are supported by the device. The FSM provides a lot of flexibility to make backwards compatible extensions but the VFIO_DEVICE_FEATURE also allows for future breaking extensions for scenarios that cannot support even the basic STOP_COPY requirements. The VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE with the GET option (i.e. VFIO_DEVICE_FEATURE_GET) can be used to read the current migration state of the VFIO device. Data transfer sessions are now carried over a file descriptor, instead of the region. The FD functions for the lifetime of the data transfer session. read() and write() transfer the data with normal Linux stream FD semantics. This design allows future expansion to support poll(), io_uring, and other performance optimizations. The complicated mmap mode for data transfer is discarded as current qemu doesn't take meaningful advantage of it, and the new qemu implementation avoids substantially all the performance penalty of using a read() on the region. Link: https://lore.kernel.org/all/20220224142024.147653-10-yishaih@nvidia.com Signed-off-by: Jason Gunthorpe Tested-by: Shameer Kolothum Reviewed-by: Kevin Tian Reviewed-by: Alex Williamson Reviewed-by: Cornelia Huck Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/uapi/linux/vfio.h | 174 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 161 insertions(+), 13 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ef33ea002b0b..22ed358c04c5 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -605,25 +605,25 @@ struct vfio_region_gfx_edid { struct vfio_device_migration_info { __u32 device_state; /* VFIO device state */ -#define VFIO_DEVICE_STATE_STOP (0) -#define VFIO_DEVICE_STATE_RUNNING (1 << 0) -#define VFIO_DEVICE_STATE_SAVING (1 << 1) -#define VFIO_DEVICE_STATE_RESUMING (1 << 2) -#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ - VFIO_DEVICE_STATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING) +#define VFIO_DEVICE_STATE_V1_STOP (0) +#define VFIO_DEVICE_STATE_V1_RUNNING (1 << 0) +#define VFIO_DEVICE_STATE_V1_SAVING (1 << 1) +#define VFIO_DEVICE_STATE_V1_RESUMING (1 << 2) +#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_V1_RUNNING | \ + VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING) #define VFIO_DEVICE_STATE_VALID(state) \ - (state & VFIO_DEVICE_STATE_RESUMING ? \ - (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1) + (state & VFIO_DEVICE_STATE_V1_RESUMING ? \ + (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_V1_RESUMING : 1) #define VFIO_DEVICE_STATE_IS_ERROR(state) \ - ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING)) + ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING)) #define VFIO_DEVICE_STATE_SET_ERROR(state) \ - ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING) + ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING) __u32 reserved; __u64 pending_bytes; @@ -1002,6 +1002,154 @@ struct vfio_device_feature { */ #define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) +/* + * Indicates the device can support the migration API through + * VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. If this GET succeeds, the RUNNING and + * ERROR states are always supported. Support for additional states is + * indicated via the flags field; at least VFIO_MIGRATION_STOP_COPY must be + * set. + * + * VFIO_MIGRATION_STOP_COPY means that STOP, STOP_COPY and + * RESUMING are supported. + */ +struct vfio_device_feature_migration { + __aligned_u64 flags; +#define VFIO_MIGRATION_STOP_COPY (1 << 0) +}; +#define VFIO_DEVICE_FEATURE_MIGRATION 1 + +/* + * Upon VFIO_DEVICE_FEATURE_SET, execute a migration state change on the VFIO + * device. The new state is supplied in device_state, see enum + * vfio_device_mig_state for details + * + * The kernel migration driver must fully transition the device to the new state + * value before the operation returns to the user. + * + * The kernel migration driver must not generate asynchronous device state + * transitions outside of manipulation by the user or the VFIO_DEVICE_RESET + * ioctl as described above. + * + * If this function fails then current device_state may be the original + * operating state or some other state along the combination transition path. + * The user can then decide if it should execute a VFIO_DEVICE_RESET, attempt + * to return to the original state, or attempt to return to some other state + * such as RUNNING or STOP. + * + * If the new_state starts a new data transfer session then the FD associated + * with that session is returned in data_fd. The user is responsible to close + * this FD when it is finished. The user must consider the migration data stream + * carried over the FD to be opaque and must preserve the byte order of the + * stream. The user is not required to preserve buffer segmentation when writing + * the data stream during the RESUMING operation. + * + * Upon VFIO_DEVICE_FEATURE_GET, get the current migration state of the VFIO + * device, data_fd will be -1. + */ +struct vfio_device_feature_mig_state { + __u32 device_state; /* From enum vfio_device_mig_state */ + __s32 data_fd; +}; +#define VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE 2 + +/* + * The device migration Finite State Machine is described by the enum + * vfio_device_mig_state. Some of the FSM arcs will create a migration data + * transfer session by returning a FD, in this case the migration data will + * flow over the FD using read() and write() as discussed below. + * + * There are 5 states to support VFIO_MIGRATION_STOP_COPY: + * RUNNING - The device is running normally + * STOP - The device does not change the internal or external state + * STOP_COPY - The device internal state can be read out + * RESUMING - The device is stopped and is loading a new internal state + * ERROR - The device has failed and must be reset + * + * The FSM takes actions on the arcs between FSM states. The driver implements + * the following behavior for the FSM arcs: + * + * RUNNING -> STOP + * STOP_COPY -> STOP + * While in STOP the device must stop the operation of the device. The device + * must not generate interrupts, DMA, or any other change to external state. + * It must not change its internal state. When stopped the device and kernel + * migration driver must accept and respond to interaction to support external + * subsystems in the STOP state, for example PCI MSI-X and PCI config space. + * Failure by the user to restrict device access while in STOP must not result + * in error conditions outside the user context (ex. host system faults). + * + * The STOP_COPY arc will terminate a data transfer session. + * + * RESUMING -> STOP + * Leaving RESUMING terminates a data transfer session and indicates the + * device should complete processing of the data delivered by write(). The + * kernel migration driver should complete the incorporation of data written + * to the data transfer FD into the device internal state and perform + * final validity and consistency checking of the new device state. If the + * user provided data is found to be incomplete, inconsistent, or otherwise + * invalid, the migration driver must fail the SET_STATE ioctl and + * optionally go to the ERROR state as described below. + * + * While in STOP the device has the same behavior as other STOP states + * described above. + * + * To abort a RESUMING session the device must be reset. + * + * STOP -> RUNNING + * While in RUNNING the device is fully operational, the device may generate + * interrupts, DMA, respond to MMIO, all vfio device regions are functional, + * and the device may advance its internal state. + * + * STOP -> STOP_COPY + * This arc begin the process of saving the device state and will return a + * new data_fd. + * + * While in the STOP_COPY state the device has the same behavior as STOP + * with the addition that the data transfers session continues to stream the + * migration state. End of stream on the FD indicates the entire device + * state has been transferred. + * + * The user should take steps to restrict access to vfio device regions while + * the device is in STOP_COPY or risk corruption of the device migration data + * stream. + * + * STOP -> RESUMING + * Entering the RESUMING state starts a process of restoring the device state + * and will return a new data_fd. The data stream fed into the data_fd should + * be taken from the data transfer output of a single FD during saving from + * a compatible device. The migration driver may alter/reset the internal + * device state for this arc if required to prepare the device to receive the + * migration data. + * + * any -> ERROR + * ERROR cannot be specified as a device state, however any transition request + * can be failed with an errno return and may then move the device_state into + * ERROR. In this case the device was unable to execute the requested arc and + * was also unable to restore the device to any valid device_state. + * To recover from ERROR VFIO_DEVICE_RESET must be used to return the + * device_state back to RUNNING. + * + * The remaining possible transitions are interpreted as combinations of the + * above FSM arcs. As there are multiple paths through the FSM arcs the path + * should be selected based on the following rules: + * - Select the shortest path. + * Refer to vfio_mig_get_next_state() for the result of the algorithm. + * + * The automatic transit through the FSM arcs that make up the combination + * transition is invisible to the user. When working with combination arcs the + * user may see any step along the path in the device_state if SET_STATE + * fails. When handling these types of errors users should anticipate future + * revisions of this protocol using new states and those states becoming + * visible in this case. + */ +enum vfio_device_mig_state { + VFIO_DEVICE_STATE_ERROR = 0, + VFIO_DEVICE_STATE_STOP = 1, + VFIO_DEVICE_STATE_RUNNING = 2, + VFIO_DEVICE_STATE_STOP_COPY = 3, + VFIO_DEVICE_STATE_RESUMING = 4, +}; + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- cgit v1.2.3 From 8cb3d83b959be0631cd719b995c40c3cda21cd47 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Feb 2022 16:20:19 +0200 Subject: vfio: Extend the device migration protocol with RUNNING_P2P The RUNNING_P2P state is designed to support multiple devices in the same VM that are doing P2P transactions between themselves. When in RUNNING_P2P the device must be able to accept incoming P2P transactions but should not generate outgoing P2P transactions. As an optional extension to the mandatory states it is defined as in between STOP and RUNNING: STOP -> RUNNING_P2P -> RUNNING -> RUNNING_P2P -> STOP For drivers that are unable to support RUNNING_P2P the core code silently merges RUNNING_P2P and RUNNING together. Unless driver support is present, the new state cannot be used in SET_STATE. Drivers that support this will be required to implement 4 FSM arcs beyond the basic FSM. 2 of the basic FSM arcs become combination transitions. Compared to the v1 clarification, NDMA is redefined into FSM states and is described in terms of the desired P2P quiescent behavior, noting that halting all DMA is an acceptable implementation. Link: https://lore.kernel.org/all/20220224142024.147653-11-yishaih@nvidia.com Signed-off-by: Jason Gunthorpe Tested-by: Shameer Kolothum Reviewed-by: Kevin Tian Reviewed-by: Alex Williamson Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/uapi/linux/vfio.h | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 22ed358c04c5..26a66f68371d 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1011,10 +1011,16 @@ struct vfio_device_feature { * * VFIO_MIGRATION_STOP_COPY means that STOP, STOP_COPY and * RESUMING are supported. + * + * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P + * is supported in addition to the STOP_COPY states. + * + * Other combinations of flags have behavior to be defined in the future. */ struct vfio_device_feature_migration { __aligned_u64 flags; #define VFIO_MIGRATION_STOP_COPY (1 << 0) +#define VFIO_MIGRATION_P2P (1 << 1) }; #define VFIO_DEVICE_FEATURE_MIGRATION 1 @@ -1065,10 +1071,13 @@ struct vfio_device_feature_mig_state { * RESUMING - The device is stopped and is loading a new internal state * ERROR - The device has failed and must be reset * + * And 1 optional state to support VFIO_MIGRATION_P2P: + * RUNNING_P2P - RUNNING, except the device cannot do peer to peer DMA + * * The FSM takes actions on the arcs between FSM states. The driver implements * the following behavior for the FSM arcs: * - * RUNNING -> STOP + * RUNNING_P2P -> STOP * STOP_COPY -> STOP * While in STOP the device must stop the operation of the device. The device * must not generate interrupts, DMA, or any other change to external state. @@ -1095,11 +1104,16 @@ struct vfio_device_feature_mig_state { * * To abort a RESUMING session the device must be reset. * - * STOP -> RUNNING + * RUNNING_P2P -> RUNNING * While in RUNNING the device is fully operational, the device may generate * interrupts, DMA, respond to MMIO, all vfio device regions are functional, * and the device may advance its internal state. * + * RUNNING -> RUNNING_P2P + * STOP -> RUNNING_P2P + * While in RUNNING_P2P the device is partially running in the P2P quiescent + * state defined below. + * * STOP -> STOP_COPY * This arc begin the process of saving the device state and will return a * new data_fd. @@ -1129,6 +1143,18 @@ struct vfio_device_feature_mig_state { * To recover from ERROR VFIO_DEVICE_RESET must be used to return the * device_state back to RUNNING. * + * The optional peer to peer (P2P) quiescent state is intended to be a quiescent + * state for the device for the purposes of managing multiple devices within a + * user context where peer-to-peer DMA between devices may be active. The + * RUNNING_P2P states must prevent the device from initiating + * any new P2P DMA transactions. If the device can identify P2P transactions + * then it can stop only P2P DMA, otherwise it must stop all DMA. The migration + * driver must complete any such outstanding operations prior to completing the + * FSM arc into a P2P state. For the purpose of specification the states + * behave as though the device was fully running if not supported. Like while in + * STOP or STOP_COPY the user must not touch the device, otherwise the state + * can be exited. + * * The remaining possible transitions are interpreted as combinations of the * above FSM arcs. As there are multiple paths through the FSM arcs the path * should be selected based on the following rules: @@ -1141,6 +1167,11 @@ struct vfio_device_feature_mig_state { * fails. When handling these types of errors users should anticipate future * revisions of this protocol using new states and those states becoming * visible in this case. + * + * The optional states cannot be used with SET_STATE if the device does not + * support them. The user can discover if these states are supported by using + * VFIO_DEVICE_FEATURE_MIGRATION. By using combination transitions the user can + * avoid knowing about these optional states if the kernel driver supports them. */ enum vfio_device_mig_state { VFIO_DEVICE_STATE_ERROR = 0, @@ -1148,6 +1179,7 @@ enum vfio_device_mig_state { VFIO_DEVICE_STATE_RUNNING = 2, VFIO_DEVICE_STATE_STOP_COPY = 3, VFIO_DEVICE_STATE_RESUMING = 4, + VFIO_DEVICE_STATE_RUNNING_P2P = 5, }; /* -------- API for Type1 VFIO IOMMU -------- */ -- cgit v1.2.3 From 0f3f9cd7f752f6e685e378620babc5e34af6fb9f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Feb 2022 16:20:20 +0200 Subject: vfio: Remove migration protocol v1 documentation v1 was never implemented and is replaced by v2. The old uAPI documentation is removed from the header file. The old uAPI definitions are still kept in the header file to ease transition for userspace copying these headers. They will be fully removed down the road. Link: https://lore.kernel.org/all/20220224142024.147653-12-yishaih@nvidia.com Signed-off-by: Jason Gunthorpe Tested-by: Shameer Kolothum Reviewed-by: Alex Williamson Reviewed-by: Cornelia Huck Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/uapi/linux/vfio.h | 200 +--------------------------------------------- 1 file changed, 2 insertions(+), 198 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 26a66f68371d..fea86061b44e 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -323,7 +323,7 @@ struct vfio_region_info_cap_type { #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) #define VFIO_REGION_TYPE_GFX (1) #define VFIO_REGION_TYPE_CCW (2) -#define VFIO_REGION_TYPE_MIGRATION (3) +#define VFIO_REGION_TYPE_MIGRATION_DEPRECATED (3) /* sub-types for VFIO_REGION_TYPE_PCI_* */ @@ -405,203 +405,7 @@ struct vfio_region_gfx_edid { #define VFIO_REGION_SUBTYPE_CCW_CRW (3) /* sub-types for VFIO_REGION_TYPE_MIGRATION */ -#define VFIO_REGION_SUBTYPE_MIGRATION (1) - -/* - * The structure vfio_device_migration_info is placed at the 0th offset of - * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related - * migration information. Field accesses from this structure are only supported - * at their native width and alignment. Otherwise, the result is undefined and - * vendor drivers should return an error. - * - * device_state: (read/write) - * - The user application writes to this field to inform the vendor driver - * about the device state to be transitioned to. - * - The vendor driver should take the necessary actions to change the - * device state. After successful transition to a given state, the - * vendor driver should return success on write(device_state, state) - * system call. If the device state transition fails, the vendor driver - * should return an appropriate -errno for the fault condition. - * - On the user application side, if the device state transition fails, - * that is, if write(device_state, state) returns an error, read - * device_state again to determine the current state of the device from - * the vendor driver. - * - The vendor driver should return previous state of the device unless - * the vendor driver has encountered an internal error, in which case - * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR. - * - The user application must use the device reset ioctl to recover the - * device from VFIO_DEVICE_STATE_ERROR state. If the device is - * indicated to be in a valid device state by reading device_state, the - * user application may attempt to transition the device to any valid - * state reachable from the current state or terminate itself. - * - * device_state consists of 3 bits: - * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear, - * it indicates the _STOP state. When the device state is changed to - * _STOP, driver should stop the device before write() returns. - * - If bit 1 is set, it indicates the _SAVING state, which means that the - * driver should start gathering device state information that will be - * provided to the VFIO user application to save the device's state. - * - If bit 2 is set, it indicates the _RESUMING state, which means that - * the driver should prepare to resume the device. Data provided through - * the migration region should be used to resume the device. - * Bits 3 - 31 are reserved for future use. To preserve them, the user - * application should perform a read-modify-write operation on this - * field when modifying the specified bits. - * - * +------- _RESUMING - * |+------ _SAVING - * ||+----- _RUNNING - * ||| - * 000b => Device Stopped, not saving or resuming - * 001b => Device running, which is the default state - * 010b => Stop the device & save the device state, stop-and-copy state - * 011b => Device running and save the device state, pre-copy state - * 100b => Device stopped and the device state is resuming - * 101b => Invalid state - * 110b => Error state - * 111b => Invalid state - * - * State transitions: - * - * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP - * (100b) (001b) (011b) (010b) (000b) - * 0. Running or default state - * | - * - * 1. Normal Shutdown (optional) - * |------------------------------------->| - * - * 2. Save the state or suspend - * |------------------------->|---------->| - * - * 3. Save the state during live migration - * |----------->|------------>|---------->| - * - * 4. Resuming - * |<---------| - * - * 5. Resumed - * |--------->| - * - * 0. Default state of VFIO device is _RUNNING when the user application starts. - * 1. During normal shutdown of the user application, the user application may - * optionally change the VFIO device state from _RUNNING to _STOP. This - * transition is optional. The vendor driver must support this transition but - * must not require it. - * 2. When the user application saves state or suspends the application, the - * device state transitions from _RUNNING to stop-and-copy and then to _STOP. - * On state transition from _RUNNING to stop-and-copy, driver must stop the - * device, save the device state and send it to the application through the - * migration region. The sequence to be followed for such transition is given - * below. - * 3. In live migration of user application, the state transitions from _RUNNING - * to pre-copy, to stop-and-copy, and to _STOP. - * On state transition from _RUNNING to pre-copy, the driver should start - * gathering the device state while the application is still running and send - * the device state data to application through the migration region. - * On state transition from pre-copy to stop-and-copy, the driver must stop - * the device, save the device state and send it to the user application - * through the migration region. - * Vendor drivers must support the pre-copy state even for implementations - * where no data is provided to the user before the stop-and-copy state. The - * user must not be required to consume all migration data before the device - * transitions to a new state, including the stop-and-copy state. - * The sequence to be followed for above two transitions is given below. - * 4. To start the resuming phase, the device state should be transitioned from - * the _RUNNING to the _RESUMING state. - * In the _RESUMING state, the driver should use the device state data - * received through the migration region to resume the device. - * 5. After providing saved device data to the driver, the application should - * change the state from _RESUMING to _RUNNING. - * - * reserved: - * Reads on this field return zero and writes are ignored. - * - * pending_bytes: (read only) - * The number of pending bytes still to be migrated from the vendor driver. - * - * data_offset: (read only) - * The user application should read data_offset field from the migration - * region. The user application should read the device data from this - * offset within the migration region during the _SAVING state or write - * the device data during the _RESUMING state. See below for details of - * sequence to be followed. - * - * data_size: (read/write) - * The user application should read data_size to get the size in bytes of - * the data copied in the migration region during the _SAVING state and - * write the size in bytes of the data copied in the migration region - * during the _RESUMING state. - * - * The format of the migration region is as follows: - * ------------------------------------------------------------------ - * |vfio_device_migration_info| data section | - * | | /////////////////////////////// | - * ------------------------------------------------------------------ - * ^ ^ - * offset 0-trapped part data_offset - * - * The structure vfio_device_migration_info is always followed by the data - * section in the region, so data_offset will always be nonzero. The offset - * from where the data is copied is decided by the kernel driver. The data - * section can be trapped, mmapped, or partitioned, depending on how the kernel - * driver defines the data section. The data section partition can be defined - * as mapped by the sparse mmap capability. If mmapped, data_offset must be - * page aligned, whereas initial section which contains the - * vfio_device_migration_info structure, might not end at the offset, which is - * page aligned. The user is not required to access through mmap regardless - * of the capabilities of the region mmap. - * The vendor driver should determine whether and how to partition the data - * section. The vendor driver should return data_offset accordingly. - * - * The sequence to be followed while in pre-copy state and stop-and-copy state - * is as follows: - * a. Read pending_bytes, indicating the start of a new iteration to get device - * data. Repeated read on pending_bytes at this stage should have no side - * effects. - * If pending_bytes == 0, the user application should not iterate to get data - * for that device. - * If pending_bytes > 0, perform the following steps. - * b. Read data_offset, indicating that the vendor driver should make data - * available through the data section. The vendor driver should return this - * read operation only after data is available from (region + data_offset) - * to (region + data_offset + data_size). - * c. Read data_size, which is the amount of data in bytes available through - * the migration region. - * Read on data_offset and data_size should return the offset and size of - * the current buffer if the user application reads data_offset and - * data_size more than once here. - * d. Read data_size bytes of data from (region + data_offset) from the - * migration region. - * e. Process the data. - * f. Read pending_bytes, which indicates that the data from the previous - * iteration has been read. If pending_bytes > 0, go to step b. - * - * The user application can transition from the _SAVING|_RUNNING - * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the - * number of pending bytes. The user application should iterate in _SAVING - * (stop-and-copy) until pending_bytes is 0. - * - * The sequence to be followed while _RESUMING device state is as follows: - * While data for this device is available, repeat the following steps: - * a. Read data_offset from where the user application should write data. - * b. Write migration data starting at the migration region + data_offset for - * the length determined by data_size from the migration source. - * c. Write data_size, which indicates to the vendor driver that data is - * written in the migration region. Vendor driver must return this write - * operations on consuming data. Vendor driver should apply the - * user-provided migration region data to the device resume state. - * - * If an error occurs during the above sequences, the vendor driver can return - * an error code for next read() or write() operation, which will terminate the - * loop. The user application should then take the next necessary action, for - * example, failing migration or terminating the user application. - * - * For the user application, data is opaque. The user application should write - * data in the same order as the data is received and the data should be of - * same transaction size at the source. - */ +#define VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED (1) struct vfio_device_migration_info { __u32 device_state; /* VFIO device state */ -- cgit v1.2.3 From 8d21ec0e46ed6e39994accff8eb4f2be3d2e76b5 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 2 Mar 2022 11:56:34 -0800 Subject: bpf: Add __sk_buff->delivery_time_type and bpf_skb_set_skb_delivery_time() * __sk_buff->delivery_time_type: This patch adds __sk_buff->delivery_time_type. It tells if the delivery_time is stored in __sk_buff->tstamp or not. It will be most useful for ingress to tell if the __sk_buff->tstamp has the (rcv) timestamp or delivery_time. If delivery_time_type is 0 (BPF_SKB_DELIVERY_TIME_NONE), it has the (rcv) timestamp. Two non-zero types are defined for the delivery_time_type, BPF_SKB_DELIVERY_TIME_MONO and BPF_SKB_DELIVERY_TIME_UNSPEC. For UNSPEC, it can only happen in egress because only mono delivery_time can be forwarded to ingress now. The clock of UNSPEC delivery_time can be deduced from the skb->sk->sk_clockid which is how the sch_etf doing it also. * Provide forwarded delivery_time to tc-bpf@ingress: With the help of the new delivery_time_type, the tc-bpf has a way to tell if the __sk_buff->tstamp has the (rcv) timestamp or the delivery_time. During bpf load time, the verifier will learn if the bpf prog has accessed the new __sk_buff->delivery_time_type. If it does, it means the tc-bpf@ingress is expecting the skb->tstamp could have the delivery_time. The kernel will then read the skb->tstamp as-is during bpf insn rewrite without checking the skb->mono_delivery_time. This is done by adding a new prog->delivery_time_access bit. The same goes for writing skb->tstamp. * bpf_skb_set_delivery_time(): The bpf_skb_set_delivery_time() helper is added to allow setting both delivery_time and the delivery_time_type at the same time. If the tc-bpf does not need to change the delivery_time_type, it can directly write to the __sk_buff->tstamp as the existing tc-bpf has already been doing. It will be most useful at ingress to change the __sk_buff->tstamp from the (rcv) timestamp to a mono delivery_time and then bpf_redirect_*(). bpf only has mono clock helper (bpf_ktime_get_ns), and the current known use case is the mono EDT for fq, and only mono delivery time can be kept during forward now, so bpf_skb_set_delivery_time() only supports setting BPF_SKB_DELIVERY_TIME_MONO. It can be extended later when use cases come up and the forwarding path also supports other clock bases. Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index afe3d0d7f5f2..4eebea830613 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5086,6 +5086,37 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. On error * *dst* buffer is zeroed out. + * + * long bpf_skb_set_delivery_time(struct sk_buff *skb, u64 dtime, u32 dtime_type) + * Description + * Set a *dtime* (delivery time) to the __sk_buff->tstamp and also + * change the __sk_buff->delivery_time_type to *dtime_type*. + * + * When setting a delivery time (non zero *dtime*) to + * __sk_buff->tstamp, only BPF_SKB_DELIVERY_TIME_MONO *dtime_type* + * is supported. It is the only delivery_time_type that will be + * kept after bpf_redirect_*(). + * + * If there is no need to change the __sk_buff->delivery_time_type, + * the delivery time can be directly written to __sk_buff->tstamp + * instead. + * + * *dtime* 0 and *dtime_type* BPF_SKB_DELIVERY_TIME_NONE + * can be used to clear any delivery time stored in + * __sk_buff->tstamp. + * + * Only IPv4 and IPv6 skb->protocol are supported. + * + * This function is most useful when it needs to set a + * mono delivery time to __sk_buff->tstamp and then + * bpf_redirect_*() to the egress of an iface. For example, + * changing the (rcv) timestamp in __sk_buff->tstamp at + * ingress to a mono delivery time and then bpf_redirect_*() + * to sch_fq@phy-dev. + * Return + * 0 on success. + * **-EINVAL** for invalid input + * **-EOPNOTSUPP** for unsupported delivery_time_type and protocol */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5280,6 +5311,7 @@ union bpf_attr { FN(xdp_load_bytes), \ FN(xdp_store_bytes), \ FN(copy_from_user_task), \ + FN(skb_set_delivery_time), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5469,6 +5501,12 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +enum { + BPF_SKB_DELIVERY_TIME_NONE, + BPF_SKB_DELIVERY_TIME_UNSPEC, + BPF_SKB_DELIVERY_TIME_MONO, +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -5509,7 +5547,8 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; - __u32 :32; /* Padding, future use. */ + __u8 delivery_time_type; + __u32 :24; /* Padding, future use. */ __u64 hwtstamp; }; -- cgit v1.2.3 From 51ef2be546e2e480e56fdb59fdeb9a4406e8d52e Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 17 Feb 2022 16:44:07 +0100 Subject: media: i2c: isl7998x: Add driver for Intersil ISL7998x Add driver for the Intersil ISL7998x Analog to MIPI CSI-2/BT656 decoder. This chip supports 1/2/4 analog video inputs and converts them into 1/2/4 VCs in MIPI CSI2 stream. This driver currently supports ISL79987 and both 720x480 and 720x576 resolutions, however as per specification, all inputs must use the same resolution and standard. The only supported pixel format is now YUYV/YUV422. The chip should support RGB565 on the CSI2 as well, but this is currently unsupported. Signed-off-by: Marek Vasut Cc: Sakari Ailus Cc: Mauro Carvalho Chehab Cc: Rob Herring To: linux-media@vger.kernel.org Signed-off-by: Michael Tretter Acked-by: Hans Verkuil [Sakari Ailus: Always call pm_runtime_get_and_resume in pre_streamon] Signed-off-by: Sakari Ailus --- include/uapi/linux/v4l2-controls.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index c8e0f84d204d..92576ed03fc4 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -219,6 +219,12 @@ enum v4l2_colorfx { */ #define V4L2_CID_USER_ALLEGRO_BASE (V4L2_CID_USER_BASE + 0x1170) +/* + * The base for the isl7998x driver controls. + * We reserve 16 controls for this driver. + */ +#define V4L2_CID_USER_ISL7998X_BASE (V4L2_CID_USER_BASE + 0x1180) + /* MPEG-class control IDs */ /* The MPEG controls are applicable to all codec controls * and the 'MPEG' part of the define is historical */ -- cgit v1.2.3 From bfa26ba343c727e055223be04e08f2ebdd43c293 Mon Sep 17 00:00:00 2001 From: William Mahon Date: Thu, 3 Mar 2022 18:23:42 -0800 Subject: HID: add mapping for KEY_DICTATE Numerous keyboards are adding dictate keys which allows for text messages to be dictated by a microphone. This patch adds a new key definition KEY_DICTATE and maps 0x0c/0x0d8 usage code to this new keycode. Additionally hid-debug is adjusted to recognize this new usage code as well. Signed-off-by: William Mahon Acked-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20220303021501.1.I5dbf50eb1a7a6734ee727bda4a8573358c6d3ec0@changeid Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 225ec87d4f22..4db5d41848e4 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -612,6 +612,7 @@ #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ #define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ +#define KEY_DICTATE 0x24a /* Start or Stop Voice Dictation Session (HUTRR99) */ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ -- cgit v1.2.3 From 327b89f0acc4c20a06ed59e4d9af7f6d804dc2e2 Mon Sep 17 00:00:00 2001 From: William Mahon Date: Thu, 3 Mar 2022 18:26:22 -0800 Subject: HID: add mapping for KEY_ALL_APPLICATIONS This patch adds a new key definition for KEY_ALL_APPLICATIONS and aliases KEY_DASHBOARD to it. It also maps the 0x0c/0x2a2 usage code to KEY_ALL_APPLICATIONS. Signed-off-by: William Mahon Acked-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20220303035618.1.I3a7746ad05d270161a18334ae06e3b6db1a1d339@changeid Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 4db5d41848e4..7989d9483ea7 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -278,7 +278,8 @@ #define KEY_PAUSECD 201 #define KEY_PROG3 202 #define KEY_PROG4 203 -#define KEY_DASHBOARD 204 /* AL Dashboard */ +#define KEY_ALL_APPLICATIONS 204 /* AC Desktop Show All Applications */ +#define KEY_DASHBOARD KEY_ALL_APPLICATIONS #define KEY_SUSPEND 205 #define KEY_CLOSE 206 /* AC Close */ #define KEY_PLAY 207 -- cgit v1.2.3 From f7ddbf5581b474fe4a0a29244acaa1bf72234675 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 3 Mar 2022 16:52:15 -0800 Subject: drm/msm: Add SET_PARAM ioctl It was always expected to have a use for this some day, so we left a placeholder. Now we do. (And I expect another use in the not too distant future when we start allowing userspace to allocate GPU iova.) Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20220304005317.776110-3-robdclark@gmail.com --- include/uapi/drm/msm_drm.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 6b8fffc28a50..cf5de53836e7 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -67,16 +67,20 @@ struct drm_msm_timespec { __s64 tv_nsec; /* nanoseconds */ }; -#define MSM_PARAM_GPU_ID 0x01 -#define MSM_PARAM_GMEM_SIZE 0x02 -#define MSM_PARAM_CHIP_ID 0x03 -#define MSM_PARAM_MAX_FREQ 0x04 -#define MSM_PARAM_TIMESTAMP 0x05 -#define MSM_PARAM_GMEM_BASE 0x06 -#define MSM_PARAM_PRIORITIES 0x07 /* The # of priority levels */ -#define MSM_PARAM_PP_PGTABLE 0x08 /* => 1 for per-process pagetables, else 0 */ -#define MSM_PARAM_FAULTS 0x09 -#define MSM_PARAM_SUSPENDS 0x0a +/* Below "RO" indicates a read-only param, "WO" indicates write-only, and + * "RW" indicates a param that can be both read (GET_PARAM) and written + * (SET_PARAM) + */ +#define MSM_PARAM_GPU_ID 0x01 /* RO */ +#define MSM_PARAM_GMEM_SIZE 0x02 /* RO */ +#define MSM_PARAM_CHIP_ID 0x03 /* RO */ +#define MSM_PARAM_MAX_FREQ 0x04 /* RO */ +#define MSM_PARAM_TIMESTAMP 0x05 /* RO */ +#define MSM_PARAM_GMEM_BASE 0x06 /* RO */ +#define MSM_PARAM_PRIORITIES 0x07 /* RO: The # of priority levels */ +#define MSM_PARAM_PP_PGTABLE 0x08 /* RO: Deprecated, always returns zero */ +#define MSM_PARAM_FAULTS 0x09 /* RO */ +#define MSM_PARAM_SUSPENDS 0x0a /* RO */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # @@ -333,9 +337,7 @@ struct drm_msm_submitqueue_query { }; #define DRM_MSM_GET_PARAM 0x00 -/* placeholder: #define DRM_MSM_SET_PARAM 0x01 - */ #define DRM_MSM_GEM_NEW 0x02 #define DRM_MSM_GEM_INFO 0x03 #define DRM_MSM_GEM_CPU_PREP 0x04 @@ -351,6 +353,7 @@ struct drm_msm_submitqueue_query { #define DRM_MSM_SUBMITQUEUE_QUERY 0x0C #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param) +#define DRM_IOCTL_MSM_SET_PARAM DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SET_PARAM, struct drm_msm_param) #define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new) #define DRM_IOCTL_MSM_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_INFO, struct drm_msm_gem_info) #define DRM_IOCTL_MSM_GEM_CPU_PREP DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_PREP, struct drm_msm_gem_cpu_prep) -- cgit v1.2.3 From 90f45c42d7d7b0ec0fd797485c07fc421c474e12 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 3 Mar 2022 16:52:16 -0800 Subject: drm/msm: Add SYSPROF param (v2) Add a SYSPROF param for system profiling tools like Mesa's pps-producer (perfetto) to control behavior related to system-wide performance counter collection. In particular, for profiling, one wants to ensure that GPU context switches do not effect perfcounter state, and might want to suppress suspend (which would cause counters to lose state). v2: Swap the order in msm_file_private_set_sysprof() [sboyd] and initialize the sysprof_active refcount to one (because the under/ overflow checking in refcount_t doesn't expect a 0->1 transition) meaning that values greater than 1 means sysprof is active. Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20220304005317.776110-4-robdclark@gmail.com --- include/uapi/drm/msm_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index cf5de53836e7..2ee03ba08681 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -81,6 +81,7 @@ struct drm_msm_timespec { #define MSM_PARAM_PP_PGTABLE 0x08 /* RO: Deprecated, always returns zero */ #define MSM_PARAM_FAULTS 0x09 /* RO */ #define MSM_PARAM_SUSPENDS 0x0a /* RO */ +#define MSM_PARAM_SYSPROF 0x0b /* WO: 1 preserves perfcntrs, 2 also disables suspend */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # -- cgit v1.2.3 From 17154addc5c1a175bcf3441ff0d9598efa1f05cd Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 24 Feb 2022 14:23:21 -0800 Subject: drm/msm: Add MSM_SUBMIT_FENCE_SN_IN Add a way for userspace to specify the sequence number fence used to track completion of the submit. As the seqno fence is simply an incrementing counter which is local to the submitqueue, it is easy for userspace to know the next value. This is useful for native userspace drivers in a vm guest, as the guest to host roundtrip can have high latency. Assigning the fence seqno in the guest userspace allows the guest to continue without waiting for response from the host. Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20220224222321.60653-1-robdclark@gmail.com --- include/uapi/drm/msm_drm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 2ee03ba08681..07efc8033492 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -232,6 +232,7 @@ struct drm_msm_gem_submit_bo { #define MSM_SUBMIT_SUDO 0x10000000 /* run submitted cmds from RB */ #define MSM_SUBMIT_SYNCOBJ_IN 0x08000000 /* enable input syncobj */ #define MSM_SUBMIT_SYNCOBJ_OUT 0x04000000 /* enable output syncobj */ +#define MSM_SUBMIT_FENCE_SN_IN 0x02000000 /* userspace passes in seqno fence */ #define MSM_SUBMIT_FLAGS ( \ MSM_SUBMIT_NO_IMPLICIT | \ MSM_SUBMIT_FENCE_FD_IN | \ @@ -239,6 +240,7 @@ struct drm_msm_gem_submit_bo { MSM_SUBMIT_SUDO | \ MSM_SUBMIT_SYNCOBJ_IN | \ MSM_SUBMIT_SYNCOBJ_OUT | \ + MSM_SUBMIT_FENCE_SN_IN | \ 0) #define MSM_SUBMIT_SYNCOBJ_RESET 0x00000001 /* Reset syncobj after wait. */ @@ -258,7 +260,7 @@ struct drm_msm_gem_submit_syncobj { */ struct drm_msm_gem_submit { __u32 flags; /* MSM_PIPE_x | MSM_SUBMIT_x */ - __u32 fence; /* out */ + __u32 fence; /* out (or in with MSM_SUBMIT_FENCE_SN_IN flag) */ __u32 nr_bos; /* in, number of submit_bo's */ __u32 nr_cmds; /* in, number of submit_cmd's */ __u64 bos; /* in, ptr to array of submit_bo's */ -- cgit v1.2.3 From 96ba61ee5331eb6e2f4c2baeb994b9ceb01d8266 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 20 Feb 2022 21:46:16 +0100 Subject: media: v4l2-ctrls: Add new V4L2_H264_DECODE_PARAM_FLAG_P/BFRAME flags Add new V4L2_H264_DECODE_PARAM_FLAG_P/BFRAME flags that are needed by NVIDIA Tegra video decoder. Userspace will have to set these flags in accordance to the type of a decoded frame. Reviewed-by: Nicolas Dufresne Signed-off-by: Dmitry Osipenko Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index c8e0f84d204d..e3d48d571062 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -1563,6 +1563,8 @@ struct v4l2_h264_dpb_entry { #define V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC 0x01 #define V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC 0x02 #define V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD 0x04 +#define V4L2_H264_DECODE_PARAM_FLAG_PFRAME 0x08 +#define V4L2_H264_DECODE_PARAM_FLAG_BFRAME 0x10 #define V4L2_CID_STATELESS_H264_DECODE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 7) /** -- cgit v1.2.3 From 1092347165cf5ed1453c1f211641a859818a2828 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Wed, 9 Feb 2022 17:03:12 +0100 Subject: media: lirc: remove unused feature LIRC_CAN_SET_REC_DUTY_CYCLE There is no hardware which can filter input on the duty cycle, so no driver implements this. On top of that, LIRC_CAN_SET_REC_DUTY_CYCLE has the same value as LIRC_CAN_MEASURE_CARRIER (0x02000000). Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index 21c69a6a100d..23b0f2c8ba81 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -73,7 +73,6 @@ #define LIRC_CAN_REC_MASK LIRC_MODE2REC(LIRC_CAN_SEND_MASK) #define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16) -#define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16) #define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000 #define LIRC_CAN_GET_REC_RESOLUTION 0x20000000 -- cgit v1.2.3 From 72a74c8f0a0df12c7d7ea012aa70d95152858dea Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Thu, 24 Feb 2022 11:10:00 +0800 Subject: media: add nv12m_8l128 and nv12m_10be_8l128 video format. nv12m_8l128 is 8-bit tiled nv12 format used by amphion decoder. nv12m_10be_8l128 is 10-bit tiled format used by amphion decoder. The tile size is 8x128 Signed-off-by: Ming Qian Signed-off-by: Shijie Qin Signed-off-by: Zhou Peng Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index df8b9c486ba1..3768a0a80830 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -632,6 +632,8 @@ struct v4l2_pix_format { /* Tiled YUV formats, non contiguous planes */ #define V4L2_PIX_FMT_NV12MT v4l2_fourcc('T', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 64x32 tiles */ #define V4L2_PIX_FMT_NV12MT_16X16 v4l2_fourcc('V', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 16x16 tiles */ +#define V4L2_PIX_FMT_NV12M_8L128 v4l2_fourcc('N', 'A', '1', '2') /* Y/CbCr 4:2:0 8x128 tiles */ +#define V4L2_PIX_FMT_NV12M_10BE_8L128 v4l2_fourcc_be('N', 'T', '1', '2') /* Y/CbCr 4:2:0 10-bit 8x128 tiles */ /* Bayer formats - see http://www.siliconimaging.com/RGB%20Bayer.htm */ #define V4L2_PIX_FMT_SBGGR8 v4l2_fourcc('B', 'A', '8', '1') /* 8 BGBG.. GRGR.. */ -- cgit v1.2.3 From d045b9eb95a9b611c483897a69e7285aefdc66d7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 7 Mar 2022 12:44:36 -0800 Subject: mptcp: introduce implicit endpoints In some edge scenarios, an MPTCP subflows can use a local address mapped by a "implicit" endpoint created by the in-kernel path manager. Such endpoints presence can be confusing, as it's creation is hard to track and will prevent the later endpoint creation from the user-space using the same address. Define a new endpoint flag to mark implicit endpoints and allow the user-space to replace implicit them with user-provided data at endpoint creation time. Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index f106a3941cdf..9690efedb5fa 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -81,6 +81,7 @@ enum { #define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1) #define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2) #define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3) +#define MPTCP_PM_ADDR_FLAG_IMPLICIT (1 << 4) enum { MPTCP_PM_CMD_UNSPEC, -- cgit v1.2.3 From b530e9e1063ed2b817eae7eec6ed2daa8be11608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 9 Mar 2022 11:53:42 +0100 Subject: bpf: Add "live packet" mode for XDP in BPF_PROG_RUN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for running XDP programs through BPF_PROG_RUN in a mode that enables live packet processing of the resulting frames. Previous uses of BPF_PROG_RUN for XDP returned the XDP program return code and the modified packet data to userspace, which is useful for unit testing of XDP programs. The existing BPF_PROG_RUN for XDP allows userspace to set the ingress ifindex and RXQ number as part of the context object being passed to the kernel. This patch reuses that code, but adds a new mode with different semantics, which can be selected with the new BPF_F_TEST_XDP_LIVE_FRAMES flag. When running BPF_PROG_RUN in this mode, the XDP program return codes will be honoured: returning XDP_PASS will result in the frame being injected into the networking stack as if it came from the selected networking interface, while returning XDP_TX and XDP_REDIRECT will result in the frame being transmitted out that interface. XDP_TX is translated into an XDP_REDIRECT operation to the same interface, since the real XDP_TX action is only possible from within the network drivers themselves, not from the process context where BPF_PROG_RUN is executed. Internally, this new mode of operation creates a page pool instance while setting up the test run, and feeds pages from that into the XDP program. The setup cost of this is amortised over the number of repetitions specified by userspace. To support the performance testing use case, we further optimise the setup step so that all pages in the pool are pre-initialised with the packet data, and pre-computed context and xdp_frame objects stored at the start of each page. This makes it possible to entirely avoid touching the page content on each XDP program invocation, and enables sending up to 9 Mpps/core on my test box. Because the data pages are recycled by the page pool, and the test runner doesn't re-initialise them for each run, subsequent invocations of the XDP program will see the packet data in the state it was after the last time it ran on that particular page. This means that an XDP program that modifies the packet before redirecting it has to be careful about which assumptions it makes about the packet content, but that is only an issue for the most naively written programs. Enabling the new flag is only allowed when not setting ctx_out and data_out in the test specification, since using it means frames will be redirected somewhere else, so they can't be returned. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220309105346.100053-2-toke@redhat.com --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4eebea830613..bc23020b638d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1232,6 +1232,8 @@ enum { /* If set, run the test on the cpu specified by bpf_attr.test.cpu */ #define BPF_F_TEST_RUN_ON_CPU (1U << 0) +/* If set, XDP frames will be transmitted after processing */ +#define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { @@ -1393,6 +1395,7 @@ union bpf_attr { __aligned_u64 ctx_out; __u32 flags; __u32 cpu; + __u32 batch_size; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ -- cgit v1.2.3 From 530e0d46c61314c59ecfdb8d3bcb87edbc0f85d3 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 9 Mar 2022 13:04:13 +0100 Subject: can: isotp: set default value for N_As to 50 micro seconds The N_As value describes the time a CAN frame needs on the wire when transmitted by the CAN controller. Even very short CAN FD frames need arround 100 usecs (bitrate 1Mbit/s, data bitrate 8Mbit/s). Having N_As to be zero (the former default) leads to 'no CAN frame separation' when STmin is set to zero by the receiving node. This 'burst mode' should not be enabled by default as it could potentially dump a high number of CAN frames into the netdev queue from the soft hrtimer context. This does not affect the system stability but is just not nice and cooperative. With this N_As/frame_txtime value the 'burst mode' is disabled by default. As user space applications usually do not set the frame_txtime element of struct can_isotp_options the new in-kernel default is very likely overwritten with zero when the sockopt() CAN_ISOTP_OPTS is invoked. To make sure that a N_As value of zero is only set intentional the value '0' is now interpreted as 'do not change the current value'. When a frame_txtime of zero is required for testing purposes this CAN_ISOTP_FRAME_TXTIME_ZERO u32 value has to be set in frame_txtime. Link: https://lore.kernel.org/all/20220309120416.83514-2-socketcan@hartkopp.net Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/isotp.h | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index c55935b64ccc..590f8aea2b6d 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -137,20 +137,16 @@ struct can_isotp_ll_options { #define CAN_ISOTP_WAIT_TX_DONE 0x400 /* wait for tx completion */ #define CAN_ISOTP_SF_BROADCAST 0x800 /* 1-to-N functional addressing */ -/* default values */ +/* protocol machine default values */ #define CAN_ISOTP_DEFAULT_FLAGS 0 #define CAN_ISOTP_DEFAULT_EXT_ADDRESS 0x00 #define CAN_ISOTP_DEFAULT_PAD_CONTENT 0xCC /* prevent bit-stuffing */ -#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 0 +#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 50000 /* 50 micro seconds */ #define CAN_ISOTP_DEFAULT_RECV_BS 0 #define CAN_ISOTP_DEFAULT_RECV_STMIN 0x00 #define CAN_ISOTP_DEFAULT_RECV_WFTMAX 0 -#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU -#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN -#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 - /* * Remark on CAN_ISOTP_DEFAULT_RECV_* values: * @@ -162,4 +158,24 @@ struct can_isotp_ll_options { * consistency and copied directly into the flow control (FC) frame. */ +/* link layer default values => make use of Classical CAN frames */ + +#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU +#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN +#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 + +/* + * The CAN_ISOTP_DEFAULT_FRAME_TXTIME has become a non-zero value as + * it only makes sense for isotp implementation tests to run without + * a N_As value. As user space applications usually do not set the + * frame_txtime element of struct can_isotp_options the new in-kernel + * default is very likely overwritten with zero when the sockopt() + * CAN_ISOTP_OPTS is invoked. + * To make sure that a N_As value of zero is only set intentional the + * value '0' is now interpreted as 'do not change the current value'. + * When a frame_txtime of zero is required for testing purposes this + * CAN_ISOTP_FRAME_TXTIME_ZERO u32 value has to be set in frame_txtime. + */ +#define CAN_ISOTP_FRAME_TXTIME_ZERO 0xFFFFFFFF + #endif /* !_UAPI_CAN_ISOTP_H */ -- cgit v1.2.3 From e7a6c00dc77aedf27a601738ea509f1caea6d673 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 4 Mar 2022 08:22:22 -0700 Subject: io_uring: add support for registering ring file descriptors Lots of workloads use multiple threads, in which case the file table is shared between them. This makes getting and putting the ring file descriptor for each io_uring_enter(2) system call more expensive, as it involves an atomic get and put for each call. Similarly to how we allow registering normal file descriptors to avoid this overhead, add support for an io_uring_register(2) API that allows to register the ring fds themselves: 1) IORING_REGISTER_RING_FDS - takes an array of io_uring_rsrc_update structs, and registers them with the task. 2) IORING_UNREGISTER_RING_FDS - takes an array of io_uring_src_update structs, and unregisters them. When a ring fd is registered, it is internally represented by an offset. This offset is returned to the application, and the application then uses this offset and sets IORING_ENTER_REGISTERED_RING for the io_uring_enter(2) system call. This works just like using a registered file descriptor, rather than a real one, in an SQE, where IOSQE_FIXED_FILE gets set to tell io_uring that we're using an internal offset/descriptor rather than a real file descriptor. In initial testing, this provides a nice bump in performance for threaded applications in real world cases where the batch count (eg number of requests submitted per io_uring_enter(2) invocation) is low. In a microbenchmark, submitting NOP requests, we see the following increases in performance: Requests per syscall Baseline Registered Increase ---------------------------------------------------------------- 1 ~7030K ~8080K +15% 2 ~13120K ~14800K +13% 4 ~22740K ~25300K +11% Co-developed-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 787f491f0d2a..42b2fe84dbcd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -257,10 +257,11 @@ struct io_cqring_offsets { /* * io_uring_enter(2) flags */ -#define IORING_ENTER_GETEVENTS (1U << 0) -#define IORING_ENTER_SQ_WAKEUP (1U << 1) -#define IORING_ENTER_SQ_WAIT (1U << 2) -#define IORING_ENTER_EXT_ARG (1U << 3) +#define IORING_ENTER_GETEVENTS (1U << 0) +#define IORING_ENTER_SQ_WAKEUP (1U << 1) +#define IORING_ENTER_SQ_WAIT (1U << 2) +#define IORING_ENTER_EXT_ARG (1U << 3) +#define IORING_ENTER_REGISTERED_RING (1U << 4) /* * Passed in for io_uring_setup(2). Copied back with updated info on success @@ -325,6 +326,10 @@ enum { /* set/get max number of io-wq workers */ IORING_REGISTER_IOWQ_MAX_WORKERS = 19, + /* register/unregister io_uring fd with the ring */ + IORING_REGISTER_RING_FDS = 20, + IORING_UNREGISTER_RING_FDS = 21, + /* this goes last */ IORING_REGISTER_LAST }; -- cgit v1.2.3 From 4f57f06ce2186c31c3da52386125dc57b1cd6f96 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Mar 2022 06:27:26 -0700 Subject: io_uring: add support for IORING_OP_MSG_RING command This adds support for IORING_OP_MSG_RING, which allows an SQE to signal another ring. That allows either waking up someone waiting on the ring, or even passing a 64-bit value via the user_data field in the CQE. sqe->fd must contain the fd of a ring that should receive the CQE. sqe->off will be propagated to the cqe->user_data on the target ring, and sqe->len will be propagated to cqe->res. The results CQE will have IORING_CQE_F_MSG set in its flags, to indicate that this CQE was generated from a messaging request rather than a SQE issued locally on that ring. This effectively allows passing a 64-bit and a 32-bit quantify between the two rings. This request type has the following request specific error cases: - -EBADFD. Set if the sqe->fd doesn't point to a file descriptor that is of the io_uring type. - -EOVERFLOW. Set if we were not able to deliver a request to the target ring. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 42b2fe84dbcd..8bd4bfdd9a89 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -143,6 +143,7 @@ enum { IORING_OP_MKDIRAT, IORING_OP_SYMLINKAT, IORING_OP_LINKAT, + IORING_OP_MSG_RING, /* this goes last, obviously */ IORING_OP_LAST, @@ -199,9 +200,11 @@ struct io_uring_cqe { * * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries + * IORING_CQE_F_MSG If set, CQE was generated with IORING_OP_MSG_RING */ #define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_MORE (1U << 1) +#define IORING_CQE_F_MSG (1U << 2) enum { IORING_CQE_BUFFER_SHIFT = 16, -- cgit v1.2.3 From 153474ba1a4aed0a7b797b4c2be8c35c7a4e57bd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 11:46:37 -0600 Subject: ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h Rename tracehook_report_syscall_{entry,exit} to ptrace_report_syscall_{entry,exit} and place them in ptrace.h There is no longer any generic tracehook infractructure so make these ptrace specific functions ptrace specific. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-3-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/uapi/linux/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index 3747bf816f9a..b7af92e07d1f 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -114,7 +114,7 @@ struct ptrace_rseq_configuration { /* * These values are stored in task->ptrace_message - * by tracehook_report_syscall_* to describe the current syscall-stop. + * by ptrace_report_syscall_* to describe the current syscall-stop. */ #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 -- cgit v1.2.3 From bcbb7bf6ccde7cb969a5642879832bc84ebf06a3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Mar 2022 12:59:35 -0700 Subject: io_uring: allow submissions to continue on error By default, io_uring will stop submitting a batch of requests if we run into an error submitting a request. This isn't strictly necessary, as the error result is passed out-of-band via a CQE anyway. And it can be a bit confusing for some applications. Provide a way to setup a ring that will continue submitting on error, when the error CQE has been posted. There's still one case that will break out of submission. If we fail allocating a request, then we'll still return -ENOMEM. We could in theory post a CQE for that condition too even if we never got a request. Leave that for a potential followup. Reported-by: Dylan Yudaken Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 8bd4bfdd9a89..d2be4eb22008 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -101,6 +101,7 @@ enum { #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ +#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */ enum { IORING_OP_NOP, -- cgit v1.2.3 From 9bb984f28d5bcb917d35d930fcfb89f90f9449fd Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 9 Mar 2022 01:05:09 -0800 Subject: bpf: Remove BPF_SKB_DELIVERY_TIME_NONE and rename s/delivery_time_/tstamp_/ This patch is to simplify the uapi bpf.h regarding to the tstamp type and use a similar way as the kernel to describe the value stored in __sk_buff->tstamp. My earlier thought was to avoid describing the semantic and clock base for the rcv timestamp until there is more clarity on the use case, so the __sk_buff->delivery_time_type naming instead of __sk_buff->tstamp_type. With some thoughts, it can reuse the UNSPEC naming. This patch first removes BPF_SKB_DELIVERY_TIME_NONE and also rename BPF_SKB_DELIVERY_TIME_UNSPEC to BPF_SKB_TSTAMP_UNSPEC and BPF_SKB_DELIVERY_TIME_MONO to BPF_SKB_TSTAMP_DELIVERY_MONO. The semantic of BPF_SKB_TSTAMP_DELIVERY_MONO is the same: __sk_buff->tstamp has delivery time in mono clock base. BPF_SKB_TSTAMP_UNSPEC means __sk_buff->tstamp has the (rcv) tstamp at ingress and the delivery time at egress. At egress, the clock base could be found from skb->sk->sk_clockid. __sk_buff->tstamp == 0 naturally means NONE, so NONE is not needed. With BPF_SKB_TSTAMP_UNSPEC for the rcv tstamp at ingress, the __sk_buff->delivery_time_type is also renamed to __sk_buff->tstamp_type which was also suggested in the earlier discussion: https://lore.kernel.org/bpf/b181acbe-caf8-502d-4b7b-7d96b9fc5d55@iogearbox.net/ The above will then make __sk_buff->tstamp and __sk_buff->tstamp_type the same as its kernel skb->tstamp and skb->mono_delivery_time counter part. The internal kernel function bpf_skb_convert_dtime_type_read() is then renamed to bpf_skb_convert_tstamp_type_read() and it can be simplified with the BPF_SKB_DELIVERY_TIME_NONE gone. A BPF_ALU32_IMM(BPF_AND) insn is also saved by using BPF_JMP32_IMM(BPF_JSET). The bpf helper bpf_skb_set_delivery_time() is also renamed to bpf_skb_set_tstamp(). The arg name is changed from dtime to tstamp also. It only allows setting tstamp 0 for BPF_SKB_TSTAMP_UNSPEC and it could be relaxed later if there is use case to change mono delivery time to non mono. prog->delivery_time_access is also renamed to prog->tstamp_type_access. Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220309090509.3712315-1-kafai@fb.com --- include/uapi/linux/bpf.h | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bc23020b638d..d288a0a9f797 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5090,23 +5090,22 @@ union bpf_attr { * 0 on success, or a negative error in case of failure. On error * *dst* buffer is zeroed out. * - * long bpf_skb_set_delivery_time(struct sk_buff *skb, u64 dtime, u32 dtime_type) + * long bpf_skb_set_tstamp(struct sk_buff *skb, u64 tstamp, u32 tstamp_type) * Description - * Set a *dtime* (delivery time) to the __sk_buff->tstamp and also - * change the __sk_buff->delivery_time_type to *dtime_type*. + * Change the __sk_buff->tstamp_type to *tstamp_type* + * and set *tstamp* to the __sk_buff->tstamp together. * - * When setting a delivery time (non zero *dtime*) to - * __sk_buff->tstamp, only BPF_SKB_DELIVERY_TIME_MONO *dtime_type* - * is supported. It is the only delivery_time_type that will be - * kept after bpf_redirect_*(). - * - * If there is no need to change the __sk_buff->delivery_time_type, - * the delivery time can be directly written to __sk_buff->tstamp + * If there is no need to change the __sk_buff->tstamp_type, + * the tstamp value can be directly written to __sk_buff->tstamp * instead. * - * *dtime* 0 and *dtime_type* BPF_SKB_DELIVERY_TIME_NONE - * can be used to clear any delivery time stored in - * __sk_buff->tstamp. + * BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that + * will be kept during bpf_redirect_*(). A non zero + * *tstamp* must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO + * *tstamp_type*. + * + * A BPF_SKB_TSTAMP_UNSPEC *tstamp_type* can only be used + * with a zero *tstamp*. * * Only IPv4 and IPv6 skb->protocol are supported. * @@ -5119,7 +5118,7 @@ union bpf_attr { * Return * 0 on success. * **-EINVAL** for invalid input - * **-EOPNOTSUPP** for unsupported delivery_time_type and protocol + * **-EOPNOTSUPP** for unsupported protocol */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5314,7 +5313,7 @@ union bpf_attr { FN(xdp_load_bytes), \ FN(xdp_store_bytes), \ FN(copy_from_user_task), \ - FN(skb_set_delivery_time), \ + FN(skb_set_tstamp), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5505,9 +5504,12 @@ union { \ } __attribute__((aligned(8))) enum { - BPF_SKB_DELIVERY_TIME_NONE, - BPF_SKB_DELIVERY_TIME_UNSPEC, - BPF_SKB_DELIVERY_TIME_MONO, + BPF_SKB_TSTAMP_UNSPEC, + BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ + /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, + * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC + * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + */ }; /* user accessible mirror of in-kernel sk_buff. @@ -5550,7 +5552,7 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; - __u8 delivery_time_type; + __u8 tstamp_type; __u32 :24; /* Padding, future use. */ __u64 hwtstamp; }; -- cgit v1.2.3 From 58617014405ad5c9f94f464444f4972dabb71ca7 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Thu, 10 Mar 2022 23:53:35 +0800 Subject: bpf: Fix comment for helper bpf_current_task_under_cgroup() Fix the descriptions of the return values of helper bpf_current_task_under_cgroup(). Fixes: c6b5fb8690fa ("bpf: add documentation for eBPF helpers (42-50)") Signed-off-by: Hengqi Chen Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220310155335.1278783-1-hengqi.chen@gmail.com --- include/uapi/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d288a0a9f797..e9978a916c3e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2302,8 +2302,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if current task belongs to the cgroup2. - * * 1, if current task does not belong to the cgroup2. + * * 1, if current task belongs to the cgroup2. + * * 0, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) -- cgit v1.2.3 From 174b16946e39ebd369097e0f773536c91a8c1a4c Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Wed, 2 Mar 2022 12:13:58 +0100 Subject: bpf-lsm: Introduce new helper bpf_ima_file_hash() ima_file_hash() has been modified to calculate the measurement of a file on demand, if it has not been already performed by IMA or the measurement is not fresh. For compatibility reasons, ima_inode_hash() remains unchanged. Keep the same approach in eBPF and introduce the new helper bpf_ima_file_hash() to take advantage of the modified behavior of ima_file_hash(). Signed-off-by: Roberto Sassu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220302111404.193900-4-roberto.sassu@huawei.com --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e9978a916c3e..99fab54ae9c0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5119,6 +5119,16 @@ union bpf_attr { * 0 on success. * **-EINVAL** for invalid input * **-EOPNOTSUPP** for unsupported protocol + * + * long bpf_ima_file_hash(struct file *file, void *dst, u32 size) + * Description + * Returns a calculated IMA hash of the *file*. + * If the hash is larger than *size*, then only *size* + * bytes will be copied to *dst* + * Return + * The **hash_algo** is returned on success, + * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if + * invalid arguments are passed. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5314,6 +5324,7 @@ union bpf_attr { FN(xdp_store_bytes), \ FN(copy_from_user_task), \ FN(skb_set_tstamp), \ + FN(ima_file_hash), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 1926407a4ab0e59d5a27bed7b82029b356d80fa0 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 9 Mar 2022 23:20:33 +0100 Subject: net: openvswitch: fix uAPI incompatibility with existing user space Few years ago OVS user space made a strange choice in the commit [1] to define types only valid for the user space inside the copy of a kernel uAPI header. '#ifndef __KERNEL__' and another attribute was added later. This leads to the inevitable clash between user space and kernel types when the kernel uAPI is extended. The issue was unveiled with the addition of a new type for IPv6 extension header in kernel uAPI. When kernel provides the OVS_KEY_ATTR_IPV6_EXTHDRS attribute to the older user space application, application tries to parse it as OVS_KEY_ATTR_PACKET_TYPE and discards the whole netlink message as malformed. Since OVS_KEY_ATTR_IPV6_EXTHDRS is supplied along with every IPv6 packet that goes to the user space, IPv6 support is fully broken. Fixing that by bringing these user space attributes to the kernel uAPI to avoid the clash. Strictly speaking this is not the problem of the kernel uAPI, but changing it is the only way to avoid breakage of the older user space applications at this point. These 2 types are explicitly rejected now since they should not be passed to the kernel. Additionally, OVS_KEY_ATTR_TUNNEL_INFO moved out from the '#ifdef __KERNEL__' as there is no good reason to hide it from the userspace. And it's also explicitly rejected now, because it's for in-kernel use only. Comments with warnings were added to avoid the problem coming back. (1 << type) converted to (1ULL << type) to avoid integer overflow on OVS_KEY_ATTR_IPV6_EXTHDRS, since it equals 32 now. [1] beb75a40fdc2 ("userspace: Switching of L3 packets in L2 pipeline") Fixes: 28a3f0601727 ("net: openvswitch: IPv6: Add IPv6 extension header support") Link: https://lore.kernel.org/netdev/3adf00c7-fe65-3ef4-b6d7-6d8a0cad8a5f@nvidia.com Link: https://github.com/openvswitch/ovs/commit/beb75a40fdc295bfd6521b0068b4cd12f6de507c Reported-by: Roi Dayan Signed-off-by: Ilya Maximets Acked-by: Nicolas Dichtel Acked-by: Aaron Conole Link: https://lore.kernel.org/r/20220309222033.3018976-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 9d1710f20505..ce3e1738d427 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -351,11 +351,21 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */ OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */ OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */ - OVS_KEY_ATTR_IPV6_EXTHDRS, /* struct ovs_key_ipv6_exthdr */ -#ifdef __KERNEL__ - OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ -#endif + /* User space decided to squat on types 29 and 30. They are defined + * below, but should not be sent to the kernel. + * + * WARNING: No new types should be added unless they are defined + * for both kernel and user space (no 'ifdef's). It's hard + * to keep compatibility otherwise. + */ + OVS_KEY_ATTR_PACKET_TYPE, /* be32 packet type */ + OVS_KEY_ATTR_ND_EXTENSIONS, /* IPv6 Neighbor Discovery extensions */ + + OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info. + * For in-kernel use only. + */ + OVS_KEY_ATTR_IPV6_EXTHDRS, /* struct ovs_key_ipv6_exthdr */ __OVS_KEY_ATTR_MAX }; -- cgit v1.2.3 From 2916b7a9c7c25ecf9be2f37e567a277e861f8e3f Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Tue, 22 Feb 2022 20:36:39 +0530 Subject: nl80211: fix typo of NL80211_IF_TYPE_OCB in documentation It should be NL80211_IFTYPE_OCB instead. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1645542399-4680-1-git-send-email-quic_vjakkam@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 98ed52663d6b..0568a79097b8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3259,7 +3259,7 @@ enum nl80211_attrs { * and therefore can't be created in the normal ways, use the * %NL80211_CMD_START_P2P_DEVICE and %NL80211_CMD_STOP_P2P_DEVICE * commands to create and destroy one - * @NL80211_IF_TYPE_OCB: Outside Context of a BSS + * @NL80211_IFTYPE_OCB: Outside Context of a BSS * This mode corresponds to the MIB variable dot11OCBActivated=true * @NL80211_IFTYPE_NAN: NAN device interface type (not a netdev) * @NL80211_IFTYPE_MAX: highest interface type number currently defined -- cgit v1.2.3 From b20dc3c684580ddc07eb48ee3c3dc7597cd5eebf Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 4 Mar 2022 17:40:42 +0100 Subject: gtp: Allow to create GTP device without FDs Currently, when the user wants to create GTP device, he has to provide file handles to the sockets created in userspace (IFLA_GTP_FD0, IFLA_GTP_FD1). This behaviour is not ideal, considering the option of adding support for GTP device creation through ip link. Ip link application is not a good place to create such sockets. This patch allows to create GTP device without providing IFLA_GTP_FD0 and IFLA_GTP_FD1 arguments. If the user sets IFLA_GTP_CREATE_SOCKETS attribute, then GTP module takes care of creating UDP sockets by itself. Sockets are created with the commonly known UDP ports used for GTP protocol (GTP0_PORT and GTP1U_PORT). In this case we don't have to provide encap_destroy because no extra deinitialization is needed, everything is covered by udp_tunnel_sock_release. Note: GTP instance created with only this change applied, does not handle GTP Echo Requests. This is implemented in the following patch. Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ddca20357e7e..ebd2aa3ef809 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -887,6 +887,7 @@ enum { IFLA_GTP_FD1, IFLA_GTP_PDP_HASHSIZE, IFLA_GTP_ROLE, + IFLA_GTP_CREATE_SOCKETS, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) -- cgit v1.2.3 From 9af41cc33471ea1efa6f77e188f055cc77d0a5c5 Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 4 Mar 2022 17:40:43 +0100 Subject: gtp: Implement GTP echo response Adding GTP device through ip link creates the situation where there is no userspace daemon which would handle GTP messages (Echo Request for example). GTP-U instance which would not respond to echo requests would violate GTP specification. When GTP packet arrives with GTP_ECHO_REQ message type, GTP_ECHO_RSP is send to the sender. GTP_ECHO_RSP message should contain information element with GTPIE_RECOVERY tag and restart counter value. For GTPv1 restart counter is not used and should be equal to 0, for GTPv0 restart counter contains information provided from userspace(IFLA_GTP_RESTART_COUNT). Signed-off-by: Wojciech Drewek Suggested-by: Harald Welte Reviewed-by: Harald Welte Tested-by: Harald Welte Signed-off-by: Tony Nguyen --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ebd2aa3ef809..bd24c7dc10a2 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -888,6 +888,7 @@ enum { IFLA_GTP_PDP_HASHSIZE, IFLA_GTP_ROLE, IFLA_GTP_CREATE_SOCKETS, + IFLA_GTP_RESTART_COUNT, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) -- cgit v1.2.3 From d33bd757d362699cfce3c68b53cd12b947d196f4 Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 4 Mar 2022 17:40:44 +0100 Subject: gtp: Implement GTP echo request Adding GTP device through ip link creates the situation where GTP instance is not able to send GTP echo requests. Echo requests are used to check if GTP peer is still alive. With this patch, gtp_genl_ops are extended by new cmd (GTP_CMD_ECHOREQ) which allows to send echo request in the given version of GTP protocol (v0 or v1), from the given ms address to he given peer. TID is not inclued because in all path management messages it should be equal to 0. When GTP echo response is detected, multicast message is send to everyone in the gtp_genl_family. Message contains GTP version, ms address and peer address. Suggested-by: Harald Welte Signed-off-by: Wojciech Drewek Reviewed-by: Harald Welte Signed-off-by: Tony Nguyen --- include/uapi/linux/gtp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h index 79f9191bbb24..2f61298a7b77 100644 --- a/include/uapi/linux/gtp.h +++ b/include/uapi/linux/gtp.h @@ -8,6 +8,7 @@ enum gtp_genl_cmds { GTP_CMD_NEWPDP, GTP_CMD_DELPDP, GTP_CMD_GETPDP, + GTP_CMD_ECHOREQ, GTP_CMD_MAX, }; -- cgit v1.2.3 From e3acda7ade0a36c5cbebc2b54d30b7f08a4ba29b Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 4 Mar 2022 17:40:45 +0100 Subject: net/sched: Allow flower to match on GTP options Options are as follows: PDU_TYPE:QFI and they refernce to the fields from the PDU Session Protocol. PDU Session data is conveyed in GTP-U Extension Header. GTP-U Extension Header is described in 3GPP TS 29.281. PDU Session Protocol is described in 3GPP TS 38.415. PDU_TYPE - indicates the type of the PDU Session Information (4 bits) QFI - QoS Flow Identifier (6 bits) # ip link add gtp_dev type gtp role sgsn # tc qdisc add dev gtp_dev ingress # tc filter add dev gtp_dev protocol ip parent ffff: \ flower \ enc_key_id 11 \ gtp_opts 1:8/ff:ff \ action mirred egress redirect dev eth0 Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen --- include/uapi/linux/if_tunnel.h | 4 +++- include/uapi/linux/pkt_cls.h | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 7d9105533c7b..102119628ff5 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -176,8 +176,10 @@ enum { #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) #define TUNNEL_NOCACHE __cpu_to_be16(0x2000) #define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000) +#define TUNNEL_GTP_OPT __cpu_to_be16(0x8000) #define TUNNEL_OPTIONS_PRESENT \ - (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT) + (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT | \ + TUNNEL_GTP_OPT) #endif /* _UAPI_IF_TUNNEL_H_ */ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index ee38b35c3f57..404f97fb239c 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -616,6 +616,10 @@ enum { * TCA_FLOWER_KEY_ENC_OPT_ERSPAN_ * attributes */ + TCA_FLOWER_KEY_ENC_OPTS_GTP, /* Nested + * TCA_FLOWER_KEY_ENC_OPT_GTP_ + * attributes + */ __TCA_FLOWER_KEY_ENC_OPTS_MAX, }; @@ -654,6 +658,17 @@ enum { #define TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX \ (__TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX - 1) +enum { + TCA_FLOWER_KEY_ENC_OPT_GTP_UNSPEC, + TCA_FLOWER_KEY_ENC_OPT_GTP_PDU_TYPE, /* u8 */ + TCA_FLOWER_KEY_ENC_OPT_GTP_QFI, /* u8 */ + + __TCA_FLOWER_KEY_ENC_OPT_GTP_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPT_GTP_MAX \ + (__TCA_FLOWER_KEY_ENC_OPT_GTP_MAX - 1) + enum { TCA_FLOWER_KEY_MPLS_OPTS_UNSPEC, TCA_FLOWER_KEY_MPLS_OPTS_LSE, -- cgit v1.2.3 From 3b6c6c039707f6bb7c64af2aa82a437fabb93aee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 9 Mar 2022 19:49:48 -0800 Subject: nvdimm/region: Delete nd_blk_region infrastructure Now that the nd_namespace_blk infrastructure is removed, delete all the region machinery to coordinate provisioning aliased capacity between PMEM and BLK. Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/164688418803.2879318.1302315202397235855.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 8cf1e4884fd5..17e02b64ea2e 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -189,7 +189,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) #define ND_DEVICE_REGION_BLK 3 /* nd_region: (parent of BLK namespaces) */ #define ND_DEVICE_NAMESPACE_IO 4 /* legacy persistent memory */ #define ND_DEVICE_NAMESPACE_PMEM 5 /* PMEM namespace (may alias with BLK) */ -#define ND_DEVICE_NAMESPACE_BLK 6 /* BLK namespace (may alias with PMEM) */ #define ND_DEVICE_DAX_PMEM 7 /* Device DAX interface to pmem */ enum nd_driver_flags { @@ -198,7 +197,6 @@ enum nd_driver_flags { ND_DRIVER_REGION_BLK = 1 << ND_DEVICE_REGION_BLK, ND_DRIVER_NAMESPACE_IO = 1 << ND_DEVICE_NAMESPACE_IO, ND_DRIVER_NAMESPACE_PMEM = 1 << ND_DEVICE_NAMESPACE_PMEM, - ND_DRIVER_NAMESPACE_BLK = 1 << ND_DEVICE_NAMESPACE_BLK, ND_DRIVER_DAX_PMEM = 1 << ND_DEVICE_DAX_PMEM, }; -- cgit v1.2.3 From 8109517b394e6deab5fd21cc5460e82ffed229c6 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Mon, 24 Jan 2022 11:25:24 +0100 Subject: rpmsg: ctrl: Introduce new RPMSG_CREATE/RELEASE_DEV_IOCTL controls Allow the user space application to create and release an rpmsg device by adding RPMSG_CREATE_DEV_IOCTL and RPMSG_RELEASE_DEV_IOCTL ioctrls to the /dev/rpmsg_ctrl interface The RPMSG_CREATE_DEV_IOCTL Ioctl can be used to instantiate a local rpmsg device. Depending on the back-end implementation, the associated rpmsg driver is probed and a NS announcement can be sent to the remote processor. The RPMSG_RELEASE_DEV_IOCTL allows the user application to release a rpmsg device created either by the remote processor or with the RPMSG_CREATE_DEV_IOCTL call. Depending on the back-end implementation, the associated rpmsg driver is removed and a NS destroy rpmsg can be sent to the remote processor. Suggested-by: Mathieu Poirier Signed-off-by: Arnaud Pouliquen Reviewed-by: Mathieu Poirier Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220124102524.295783-12-arnaud.pouliquen@foss.st.com --- include/uapi/linux/rpmsg.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/rpmsg.h b/include/uapi/linux/rpmsg.h index f5ca8740f3fb..1637e68177d9 100644 --- a/include/uapi/linux/rpmsg.h +++ b/include/uapi/linux/rpmsg.h @@ -33,4 +33,14 @@ struct rpmsg_endpoint_info { */ #define RPMSG_DESTROY_EPT_IOCTL _IO(0xb5, 0x2) +/** + * Instantiate a new local rpmsg service device. + */ +#define RPMSG_CREATE_DEV_IOCTL _IOW(0xb5, 0x3, struct rpmsg_endpoint_info) + +/** + * Release a local rpmsg device. + */ +#define RPMSG_RELEASE_DEV_IOCTL _IOW(0xb5, 0x4, struct rpmsg_endpoint_info) + #endif -- cgit v1.2.3 From b5fdf66f6eb2560784c6f60131dc567de06267dc Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Tue, 1 Mar 2022 14:37:27 -0500 Subject: NFS: Remove remaining dfprintks related to fscache and remove NFSDBG_FSCACHE The fscache cookie APIs including fscache_acquire_cookie() and fscache_relinquish_cookie() now have very good tracing. Thus, there is no real need for dfprintks in the NFS fscache interface. The NFS fscache interface has removed all dfprintks so remove the NFSDBG_FSCACHE defines. Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust --- include/uapi/linux/nfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nfs_fs.h b/include/uapi/linux/nfs_fs.h index 3afe3767c55d..ae0de165c014 100644 --- a/include/uapi/linux/nfs_fs.h +++ b/include/uapi/linux/nfs_fs.h @@ -52,7 +52,7 @@ #define NFSDBG_CALLBACK 0x0100 #define NFSDBG_CLIENT 0x0200 #define NFSDBG_MOUNT 0x0400 -#define NFSDBG_FSCACHE 0x0800 +#define NFSDBG_FSCACHE 0x0800 /* unused */ #define NFSDBG_PNFS 0x1000 #define NFSDBG_PNFS_LD 0x2000 #define NFSDBG_STATE 0x4000 -- cgit v1.2.3 From 2c7d2a230237e7c43fa067d695937b7e484bb92a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 15 Dec 2021 15:39:58 -0500 Subject: btrfs: add definition for EXTENT_TREE_V2 This adds the initial definition of the EXTENT_TREE_V2 incompat feature flag. This also hides the support behind CONFIG_BTRFS_DEBUG. THIS IS A IN DEVELOPMENT FORMAT CHANGE, DO NOT USE UNLESS YOU ARE A DEVELOPER OR A TESTER. The format is in flux and will be added in stages, any fs will need to be re-made between updates to the format. Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 738619994e26..1cb1a3860f1d 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -309,6 +309,7 @@ struct btrfs_ioctl_fs_info_args { #define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10) #define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11) #define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12) +#define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13) struct btrfs_ioctl_feature_flags { __u64 compat_flags; -- cgit v1.2.3 From 9c54e80ddc6bd89596a4046d451908700476fd14 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 15 Dec 2021 15:40:07 -0500 Subject: btrfs: add code to support the block group root This code adds the on disk structures for the block group root, which will hold the block group items for extent tree v2. Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 5416f1f1a77a..b069752a8ecf 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -53,6 +53,9 @@ /* tracks free space in block groups. */ #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL +/* Holds the block group items for extent tree v2. */ +#define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL + /* device stats in the device tree */ #define BTRFS_DEV_STATS_OBJECTID 0ULL -- cgit v1.2.3 From dcb77a9ae87dc1ae2c54ea2e629da357e694b664 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Aug 2021 15:58:29 -0700 Subject: btrfs: add definitions and documentation for encoded I/O ioctls In order to allow sending and receiving compressed data without decompressing it, we need an interface to write pre-compressed data directly to the filesystem and the matching interface to read compressed data without decompressing it. This adds the definitions for ioctls to do that and detailed explanations of how to use them. Reviewed-by: Nikolay Borisov Signed-off-by: Omar Sandoval Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 132 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 1cb1a3860f1d..d956b2993970 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -869,6 +869,134 @@ struct btrfs_ioctl_get_subvol_rootref_args { __u8 align[7]; }; +/* + * Data and metadata for an encoded read or write. + * + * Encoded I/O bypasses any encoding automatically done by the filesystem (e.g., + * compression). This can be used to read the compressed contents of a file or + * write pre-compressed data directly to a file. + * + * BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially + * preadv/pwritev with additional metadata about how the data is encoded and the + * size of the unencoded data. + * + * BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills + * the metadata fields, and returns the size of the encoded data. It reads one + * extent per call. It can also read data which is not encoded. + * + * BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data + * from the iovecs, and returns the size of the encoded data. Note that the + * encoded data is not validated when it is written; if it is not valid (e.g., + * it cannot be decompressed), then a subsequent read may return an error. + * + * Since the filesystem page cache contains decoded data, encoded I/O bypasses + * the page cache. Encoded I/O requires CAP_SYS_ADMIN. + */ +struct btrfs_ioctl_encoded_io_args { + /* Input parameters for both reads and writes. */ + + /* + * iovecs containing encoded data. + * + * For reads, if the size of the encoded data is larger than the sum of + * iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with + * ENOBUFS. + * + * For writes, the size of the encoded data is the sum of iov[n].iov_len + * for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may + * increase in the future). This must also be less than or equal to + * unencoded_len. + */ + const struct iovec __user *iov; + /* Number of iovecs. */ + unsigned long iovcnt; + /* + * Offset in file. + * + * For writes, must be aligned to the sector size of the filesystem. + */ + __s64 offset; + /* Currently must be zero. */ + __u64 flags; + + /* + * For reads, the following members are output parameters that will + * contain the returned metadata for the encoded data. + * For writes, the following members must be set to the metadata for the + * encoded data. + */ + + /* + * Length of the data in the file. + * + * Must be less than or equal to unencoded_len - unencoded_offset. For + * writes, must be aligned to the sector size of the filesystem unless + * the data ends at or beyond the current end of the file. + */ + __u64 len; + /* + * Length of the unencoded (i.e., decrypted and decompressed) data. + * + * For writes, must be no more than 128 KiB (this limit may increase in + * the future). If the unencoded data is actually longer than + * unencoded_len, then it is truncated; if it is shorter, then it is + * extended with zeroes. + */ + __u64 unencoded_len; + /* + * Offset from the first byte of the unencoded data to the first byte of + * logical data in the file. + * + * Must be less than unencoded_len. + */ + __u64 unencoded_offset; + /* + * BTRFS_ENCODED_IO_COMPRESSION_* type. + * + * For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE. + */ + __u32 compression; + /* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */ + __u32 encryption; + /* + * Reserved for future expansion. + * + * For reads, always returned as zero. Users should check for non-zero + * bytes. If there are any, then the kernel has a newer version of this + * structure with additional information that the user definition is + * missing. + * + * For writes, must be zeroed. + */ + __u8 reserved[64]; +}; + +/* Data is not compressed. */ +#define BTRFS_ENCODED_IO_COMPRESSION_NONE 0 +/* Data is compressed as a single zlib stream. */ +#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1 +/* + * Data is compressed as a single zstd frame with the windowLog compression + * parameter set to no more than 17. + */ +#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2 +/* + * Data is compressed sector by sector (using the sector size indicated by the + * name of the constant) with LZO1X and wrapped in the format documented in + * fs/btrfs/lzo.c. For writes, the compression sector size must match the + * filesystem sector size. + */ +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7 +#define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8 + +/* Data is not encrypted. */ +#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0 +#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1 + /* Error codes as returned by the kernel */ enum btrfs_err_code { BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1, @@ -997,5 +1125,9 @@ enum btrfs_err_code { struct btrfs_ioctl_ino_lookup_user_args) #define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \ struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_ENCODED_READ _IOR(BTRFS_IOCTL_MAGIC, 64, \ + struct btrfs_ioctl_encoded_io_args) +#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \ + struct btrfs_ioctl_encoded_io_args) #endif /* _UAPI_LINUX_BTRFS_H */ -- cgit v1.2.3 From 65722ff6181aa52c3d5b0929004af22a3a63e148 Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Tue, 8 Mar 2022 14:00:50 -0500 Subject: drm/amdkfd: CRIU export dmabuf handles for GTT BOs Export dmabuf handles for GTT BOs so that their contents can be accessed using SDMA during checkpoint/restore. v2: Squash in fix from David to set dmabuf handle to invalid for BOs that cannot be accessed using SDMA during checkpoint/restore. Signed-off-by: David Yat Sin Reviewed-by : Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index b40687bf1014..42975e940758 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -33,9 +33,10 @@ * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API * - 1.7 - Checkpoint Restore (CRIU) API + * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 7 +#define KFD_IOCTL_MINOR_VERSION 8 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -195,6 +196,8 @@ struct kfd_ioctl_dbg_wave_control_args { __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ }; +#define KFD_INVALID_FD 0xffffffff + /* Matching HSA_EVENTTYPE */ #define KFD_IOC_EVENT_SIGNAL 0 #define KFD_IOC_EVENT_NODECHANGE 1 -- cgit v1.2.3 From 435fe1c0c1f74b682dba85641406abf4337aade6 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Wed, 16 Mar 2022 08:15:57 +0200 Subject: net: geneve: support IPv4/IPv6 as inner protocol This patch adds support for encapsulating IPv4/IPv6 within GENEVE. In order to use this, a new IFLA_GENEVE_INNER_PROTO_INHERIT flag needs to be provided at device creation. This property cannot be changed for the time being. In case IP traffic is received on a non-tun device the drop count is increased. Signed-off-by: Eyal Birger Link: https://lore.kernel.org/r/20220316061557.431872-1-eyal.birger@gmail.com Signed-off-by: Paolo Abeni --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index bd24c7dc10a2..cc284c048e69 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -842,6 +842,7 @@ enum { IFLA_GENEVE_LABEL, IFLA_GENEVE_TTL_INHERIT, IFLA_GENEVE_DF, + IFLA_GENEVE_INNER_PROTO_INHERIT, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) -- cgit v1.2.3 From ec7328b59176227216c461601c6bd0e922232a9b Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:43 +0100 Subject: net: bridge: mst: Multiple Spanning Tree (MST) mode Allow the user to switch from the current per-VLAN STP mode to an MST mode. Up to this point, per-VLAN STP states where always isolated from each other. This is in contrast to the MSTP standard (802.1Q-2018, Clause 13.5), where VLANs are grouped into MST instances (MSTIs), and the state is managed on a per-MSTI level, rather that at the per-VLAN level. Perhaps due to the prevalence of the standard, many switching ASICs are built after the same model. Therefore, add a corresponding MST mode to the bridge, which we can later add offloading support for in a straight-forward way. For now, all VLANs are fixed to MSTI 0, also called the Common Spanning Tree (CST). That is, all VLANs will follow the port-global state. Upcoming changes will make this actually useful by allowing VLANs to be mapped to arbitrary MSTIs and allow individual MSTI states to be changed. Signed-off-by: Tobias Waldekranz Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 2711c3522010..30a242195ced 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -759,6 +759,7 @@ struct br_mcast_stats { enum br_boolopt_id { BR_BOOLOPT_NO_LL_LEARN, BR_BOOLOPT_MCAST_VLAN_SNOOPING, + BR_BOOLOPT_MST_ENABLE, BR_BOOLOPT_MAX }; -- cgit v1.2.3 From 8c678d60562f3e5f6d0a5f5465e27930ffedb8ca Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:44 +0100 Subject: net: bridge: mst: Allow changing a VLAN's MSTI Allow a VLAN to move out of the CST (MSTI 0), to an independent tree. The user manages the VID to MSTI mappings via a global VLAN setting. The proposed iproute2 interface would be: bridge vlan global set dev br0 vid msti Changing the state in non-zero MSTIs is still not supported, but will be addressed in upcoming changes. Signed-off-by: Tobias Waldekranz Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 30a242195ced..f60244b747ae 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -564,6 +564,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE, + BRIDGE_VLANDB_GOPTS_MSTI, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 122c29486e1ff78033c45d0d31c710e7dc8945a5 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:45 +0100 Subject: net: bridge: mst: Support setting and reporting MST port states Make it possible to change the port state in a given MSTI by extending the bridge port netlink interface (RTM_SETLINK on PF_BRIDGE).The proposed iproute2 interface would be: bridge mst set dev msti state Current states in all applicable MSTIs can also be dumped via a corresponding RTM_GETLINK. The proposed iproute interface looks like this: $ bridge mst port msti vb1 0 state forwarding 100 state disabled vb2 0 state forwarding 100 state forwarding The preexisting per-VLAN states are still valid in the MST mode (although they are read-only), and can be queried as usual if one is interested in knowing a particular VLAN's state without having to care about the VID to MSTI mapping (in this example VLAN 20 and 30 are bound to MSTI 100): $ bridge -d vlan port vlan-id vb1 10 state forwarding mcast_router 1 20 state disabled mcast_router 1 30 state disabled mcast_router 1 40 state forwarding mcast_router 1 vb2 10 state forwarding mcast_router 1 20 state forwarding mcast_router 1 30 state forwarding mcast_router 1 40 state forwarding mcast_router 1 Signed-off-by: Tobias Waldekranz Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 16 ++++++++++++++++ include/uapi/linux/rtnetlink.h | 1 + 2 files changed, 17 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index f60244b747ae..a86a7e7b811f 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -122,6 +122,7 @@ enum { IFLA_BRIDGE_VLAN_TUNNEL_INFO, IFLA_BRIDGE_MRP, IFLA_BRIDGE_CFM, + IFLA_BRIDGE_MST, __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) @@ -453,6 +454,21 @@ enum { #define IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX (__IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX - 1) +enum { + IFLA_BRIDGE_MST_UNSPEC, + IFLA_BRIDGE_MST_ENTRY, + __IFLA_BRIDGE_MST_MAX, +}; +#define IFLA_BRIDGE_MST_MAX (__IFLA_BRIDGE_MST_MAX - 1) + +enum { + IFLA_BRIDGE_MST_ENTRY_UNSPEC, + IFLA_BRIDGE_MST_ENTRY_MSTI, + IFLA_BRIDGE_MST_ENTRY_STATE, + __IFLA_BRIDGE_MST_ENTRY_MAX, +}; +#define IFLA_BRIDGE_MST_ENTRY_MAX (__IFLA_BRIDGE_MST_ENTRY_MAX - 1) + struct bridge_stp_xstats { __u64 transition_blk; __u64 transition_fwd; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 51530aade46e..83849a37db5b 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -817,6 +817,7 @@ enum { #define RTEXT_FILTER_MRP (1 << 4) #define RTEXT_FILTER_CFM_CONFIG (1 << 5) #define RTEXT_FILTER_CFM_STATUS (1 << 6) +#define RTEXT_FILTER_MST (1 << 7) /* End of information exported to user level */ -- cgit v1.2.3 From 0dcac272540613d41c05e89679e4ddb978b612f1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Mar 2022 13:24:09 +0100 Subject: bpf: Add multi kprobe link Adding new link type BPF_LINK_TYPE_KPROBE_MULTI that attaches kprobe program through fprobe API. The fprobe API allows to attach probe on multiple functions at once very fast, because it works on top of ftrace. On the other hand this limits the probe point to the function entry or return. The kprobe program gets the same pt_regs input ctx as when it's attached through the perf API. Adding new attach type BPF_TRACE_KPROBE_MULTI that allows attachment kprobe to multiple function with new link. User provides array of addresses or symbols with count to attach the kprobe program to. The new link_create uapi interface looks like: struct { __u32 flags; __u32 cnt; __aligned_u64 syms; __aligned_u64 addrs; } kprobe_multi; The flags field allows single BPF_TRACE_KPROBE_MULTI bit to create return multi kprobe. Signed-off-by: Masami Hiramatsu Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220316122419.933957-4-jolsa@kernel.org --- include/uapi/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 99fab54ae9c0..d77f47af7752 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -997,6 +997,7 @@ enum bpf_attach_type { BPF_SK_REUSEPORT_SELECT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, BPF_PERF_EVENT, + BPF_TRACE_KPROBE_MULTI, __MAX_BPF_ATTACH_TYPE }; @@ -1011,6 +1012,7 @@ enum bpf_link_type { BPF_LINK_TYPE_NETNS = 5, BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, + BPF_LINK_TYPE_KPROBE_MULTI = 8, MAX_BPF_LINK_TYPE, }; @@ -1118,6 +1120,11 @@ enum bpf_link_type { */ #define BPF_F_XDP_HAS_FRAGS (1U << 5) +/* link_create.kprobe_multi.flags used in LINK_CREATE command for + * BPF_TRACE_KPROBE_MULTI attach type to create return probe. + */ +#define BPF_F_KPROBE_MULTI_RETURN (1U << 0) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * @@ -1475,6 +1482,12 @@ union bpf_attr { */ __u64 bpf_cookie; } perf_event; + struct { + __u32 flags; + __u32 cnt; + __aligned_u64 syms; + __aligned_u64 addrs; + } kprobe_multi; }; } link_create; -- cgit v1.2.3 From ca74823c6e16dd42b7cf60d9fdde80e2a81a67bb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Mar 2022 13:24:12 +0100 Subject: bpf: Add cookie support to programs attached with kprobe multi link Adding support to call bpf_get_attach_cookie helper from kprobe programs attached with kprobe multi link. The cookie is provided by array of u64 values, where each value is paired with provided function address or symbol with the same array index. When cookie array is provided it's sorted together with addresses (check bpf_kprobe_multi_cookie_swap). This way we can find cookie based on the address in bpf_get_attach_cookie helper. Suggested-by: Andrii Nakryiko Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220316122419.933957-7-jolsa@kernel.org --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d77f47af7752..7604e7d5438f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1487,6 +1487,7 @@ union bpf_attr { __u32 cnt; __aligned_u64 syms; __aligned_u64 addrs; + __aligned_u64 cookies; } kprobe_multi; }; } link_create; -- cgit v1.2.3 From 54f586a9153201c6cff55e1f561990c78bd99aa7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 16 Mar 2022 21:27:51 +0100 Subject: rfkill: make new event layout opt-in Again new complaints surfaced that we had broken the ABI here, although previously all the userspace tools had agreed that it was their mistake and fixed it. Yet now there are cases (e.g. RHEL) that want to run old userspace with newer kernels, and thus are broken. Since this is a bit of a whack-a-mole thing, change the whole extensibility scheme of rfkill to no longer just rely on the message lengths, but instead require userspace to opt in via a new ioctl to a given maximum event size that it is willing to understand. By default, set that to RFKILL_EVENT_SIZE_V1 (8), so that the behaviour for userspace not calling the ioctl will look as if it's just running on an older kernel. Fixes: 14486c82612a ("rfkill: add a reason to the HW rfkill state") Cc: stable@vger.kernel.org # 5.11+ Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220316212749.16491491b270.Ifcb1950998330a596f29a2a162e00b7546a1d6d0@changeid --- include/uapi/linux/rfkill.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h index 9b77cfc42efa..283c5a7b3f2c 100644 --- a/include/uapi/linux/rfkill.h +++ b/include/uapi/linux/rfkill.h @@ -159,8 +159,16 @@ struct rfkill_event_ext { * old behaviour for all userspace, unless it explicitly opts in to the * rules outlined here by using the new &struct rfkill_event_ext. * - * Userspace using &struct rfkill_event_ext must adhere to the following - * rules + * Additionally, some other userspace (bluez, g-s-d) was reading with a + * large size but as streaming reads rather than message-based, or with + * too strict checks for the returned size. So eventually, we completely + * reverted this, and extended messages need to be opted in to by using + * an ioctl: + * + * ioctl(fd, RFKILL_IOCTL_MAX_SIZE, sizeof(struct rfkill_event_ext)); + * + * Userspace using &struct rfkill_event_ext and the ioctl must adhere to + * the following rules: * * 1. accept short writes, optionally using them to detect that it's * running on an older kernel; @@ -175,6 +183,8 @@ struct rfkill_event_ext { #define RFKILL_IOC_MAGIC 'R' #define RFKILL_IOC_NOINPUT 1 #define RFKILL_IOCTL_NOINPUT _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT) +#define RFKILL_IOC_MAX_SIZE 2 +#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_EXT_SIZE, __u32) /* and that's all userspace gets */ -- cgit v1.2.3 From 73799a889262b4675799bec20a2765be6d6a3f98 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 15 Mar 2022 15:38:54 -0400 Subject: counter: add new COUNTER_EVENT_CHANGE_OF_STATE Add new counter event to notify user space about every new counter pulse. Link: https://lore.kernel.org/r/20220203135727.2374052-2-o.rempel@pengutronix.de Signed-off-by: Oleksij Rempel Signed-off-by: William Breathitt Gray Link: https://lore.kernel.org/r/486a5de67414470449efb84d06a2f2214f4bb31d.1647373009.git.vilhelm.gray@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/counter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h index d0aa95aeff7b..96c5ffd368ad 100644 --- a/include/uapi/linux/counter.h +++ b/include/uapi/linux/counter.h @@ -61,6 +61,8 @@ enum counter_event_type { COUNTER_EVENT_THRESHOLD, /* Index signal detected */ COUNTER_EVENT_INDEX, + /* State of counter is changed */ + COUNTER_EVENT_CHANGE_OF_STATE, }; /** -- cgit v1.2.3 From 5c1b97c7d7b736e6439af4f43a65837bc72f56c1 Mon Sep 17 00:00:00 2001 From: Jeya R Date: Mon, 14 Feb 2022 16:09:52 +0000 Subject: misc: fastrpc: add support for FASTRPC_IOCTL_MEM_MAP/UNMAP Add support for IOCTL requests to map and unmap on DSP based on map flags. Signed-off-by: Jeya R Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220214161002.6831-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/fastrpc.h | 51 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 0a89f95463f6..d248eeb20e67 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -13,6 +13,37 @@ #define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) #define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) +#define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) +#define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) + +/** + * enum fastrpc_map_flags - control flags for mapping memory on DSP user process + * @FASTRPC_MAP_STATIC: Map memory pages with RW- permission and CACHE WRITEBACK. + * The driver is responsible for cache maintenance when passed + * the buffer to FastRPC calls. Same virtual address will be + * assigned for subsequent FastRPC calls. + * @FASTRPC_MAP_RESERVED: Reserved + * @FASTRPC_MAP_FD: Map memory pages with RW- permission and CACHE WRITEBACK. + * Mapping tagged with a file descriptor. User is responsible for + * CPU and DSP cache maintenance for the buffer. Get virtual address + * of buffer on DSP using HAP_mmap_get() and HAP_mmap_put() APIs. + * @FASTRPC_MAP_FD_DELAYED: Mapping delayed until user call HAP_mmap() and HAP_munmap() + * functions on DSP. It is useful to map a buffer with cache modes + * other than default modes. User is responsible for CPU and DSP + * cache maintenance for the buffer. + * @FASTRPC_MAP_FD_NOMAP: This flag is used to skip CPU mapping, + * otherwise behaves similar to FASTRPC_MAP_FD_DELAYED flag. + * @FASTRPC_MAP_MAX: max count for flags + * + */ +enum fastrpc_map_flags { + FASTRPC_MAP_STATIC = 0, + FASTRPC_MAP_RESERVED, + FASTRPC_MAP_FD = 2, + FASTRPC_MAP_FD_DELAYED, + FASTRPC_MAP_FD_NOMAP = 16, + FASTRPC_MAP_MAX, +}; struct fastrpc_invoke_args { __u64 ptr; @@ -49,9 +80,29 @@ struct fastrpc_req_mmap { __u64 vaddrout; /* dsp virtual address */ }; +struct fastrpc_mem_map { + __s32 version; + __s32 fd; /* fd */ + __s32 offset; /* buffer offset */ + __u32 flags; /* flags defined in enum fastrpc_map_flags */ + __u64 vaddrin; /* buffer virtual address */ + __u64 length; /* buffer length */ + __u64 vaddrout; /* [out] remote virtual address */ + __s32 attrs; /* buffer attributes used for SMMU mapping */ + __s32 reserved[4]; +}; + struct fastrpc_req_munmap { __u64 vaddrout; /* address to unmap */ __u64 size; /* size */ }; +struct fastrpc_mem_unmap { + __s32 vesion; + __s32 fd; /* fd */ + __u64 vaddr; /* remote process (dsp) virtual address */ + __u64 length; /* buffer size */ + __s32 reserved[5]; +}; + #endif /* __QCOM_FASTRPC_H__ */ -- cgit v1.2.3 From 6c16fd8bdd4058d4a6aaca9d5a7b40e4cb281d5a Mon Sep 17 00:00:00 2001 From: Jeya R Date: Mon, 14 Feb 2022 16:09:53 +0000 Subject: misc: fastrpc: Add support to get DSP capabilities Add support to get DSP capabilities. The capability information is cached on driver. Signed-off-by: Jeya R Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220214161002.6831-4-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/fastrpc.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index d248eeb20e67..7cc9d342078a 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -15,6 +15,7 @@ #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) +#define FASTRPC_IOCTL_GET_DSP_INFO _IOWR('R', 13, struct fastrpc_ioctl_capability) /** * enum fastrpc_map_flags - control flags for mapping memory on DSP user process @@ -105,4 +106,11 @@ struct fastrpc_mem_unmap { __s32 reserved[5]; }; +struct fastrpc_ioctl_capability { + __u32 domain; + __u32 attribute_id; + __u32 capability; /* dsp capability */ + __u32 reserved[4]; +}; + #endif /* __QCOM_FASTRPC_H__ */ -- cgit v1.2.3 From 7f1f481263c3ce5387d4fd5ad63ddaa8a295aab2 Mon Sep 17 00:00:00 2001 From: Jeya R Date: Mon, 14 Feb 2022 16:09:56 +0000 Subject: misc: fastrpc: check before loading process to the DSP Reject session if DSP domain is secure, device node is non-secure and signed PD is requested. Secure device node can access DSP without any restriction. Unsigned PD offload is only allowed for the DSP domain that can support unsigned offloading. Signed-off-by: Jeya R Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220214161002.6831-7-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/fastrpc.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 7cc9d342078a..f39edac20305 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -46,6 +46,23 @@ enum fastrpc_map_flags { FASTRPC_MAP_MAX, }; +enum fastrpc_proc_attr { + /* Macro for Debug attr */ + FASTRPC_MODE_DEBUG = (1 << 0), + /* Macro for Ptrace */ + FASTRPC_MODE_PTRACE = (1 << 1), + /* Macro for CRC Check */ + FASTRPC_MODE_CRC = (1 << 2), + /* Macro for Unsigned PD */ + FASTRPC_MODE_UNSIGNED_MODULE = (1 << 3), + /* Macro for Adaptive QoS */ + FASTRPC_MODE_ADAPTIVE_QOS = (1 << 4), + /* Macro for System Process */ + FASTRPC_MODE_SYSTEM_PROCESS = (1 << 5), + /* Macro for Prvileged Process */ + FASTRPC_MODE_PRIVILEGED = (1 << 6), +}; + struct fastrpc_invoke_args { __u64 ptr; __u64 length; -- cgit v1.2.3 From e90d911906196bf987492c94e38f10ca611dfd7b Mon Sep 17 00:00:00 2001 From: Vamsi Krishna Gattupalli Date: Mon, 14 Feb 2022 16:09:58 +0000 Subject: misc: fastrpc: Add support to secure memory map This patch adds support to secure memory allocations for DSP. It repurposes the reserved field in struct fastrpc_invoke_args to add attributes to invoke request, for example to setup a secure memory map for dsp. Secure memory is assigned to DSP Virtual Machine IDs using Qualcomm SCM calls. Signed-off-by: Vamsi Krishna Gattupalli Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220214161002.6831-9-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/fastrpc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index f39edac20305..5e29f2cfa42d 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -63,11 +63,14 @@ enum fastrpc_proc_attr { FASTRPC_MODE_PRIVILEGED = (1 << 6), }; +/* Fastrpc attribute for memory protection of buffers */ +#define FASTRPC_ATTR_SECUREMAP (1) + struct fastrpc_invoke_args { __u64 ptr; __u64 length; __s32 fd; - __u32 reserved; + __u32 attr; }; struct fastrpc_invoke { -- cgit v1.2.3 From 336d4b814bf078fa698488632c19beca47308896 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 12:15:32 -0600 Subject: ptrace: Move setting/clearing ptrace_message into ptrace_stop Today ptrace_message is easy to overlook as it not a core part of ptrace_stop. It has been overlooked so much that there are places that set ptrace_message and don't clear it, and places that never set it. So if you get an unlucky sequence of events the ptracer may be able to read a ptrace_message that does not apply to the current ptrace stop. Move setting of ptrace_message into ptrace_stop so that it always gets set before the stop, and always gets cleared after the stop. This prevents non-sense from being reported to userspace and makes ptrace_message more visible in the ptrace helper functions so that kernel developers can see it. Link: https://lkml.kernel.org/r/87bky67qfv.fsf_-_@email.froward.int.ebiederm.org Acked-by: Oleg Nesterov Reviewed-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/uapi/linux/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index b7af92e07d1f..195ae64a8c87 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -114,7 +114,7 @@ struct ptrace_rseq_configuration { /* * These values are stored in task->ptrace_message - * by ptrace_report_syscall_* to describe the current syscall-stop. + * by ptrace_stop to describe the current syscall-stop. */ #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 -- cgit v1.2.3 From ee2a098851bfbe8bcdd964c0121f4246f00ff41e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Mar 2022 11:20:41 -0700 Subject: bpf: Adjust BPF stack helper functions to accommodate skip > 0 Let's say that the caller has storage for num_elem stack frames. Then, the BPF stack helper functions walk the stack for only num_elem frames. This means that if skip > 0, one keeps only 'num_elem - skip' frames. This is because it sets init_nr in the perf_callchain_entry to the end of the buffer to save num_elem entries only. I believe it was because the perf callchain code unwound the stack frames until it reached the global max size (sysctl_perf_event_max_stack). However it now has perf_callchain_entry_ctx.max_stack to limit the iteration locally. This simplifies the code to handle init_nr in the BPF callstack entries and removes the confusion with the perf_event's __PERF_SAMPLE_CALLCHAIN_EARLY which sets init_nr to 0. Also change the comment on bpf_get_stack() in the header file to be more explicit what the return value means. Fixes: c195651e565a ("bpf: add bpf_get_stack helper") Signed-off-by: Namhyung Kim Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/30a7b5d5-6726-1cc2-eaee-8da2828a9a9c@oracle.com Link: https://lore.kernel.org/bpf/20220314182042.71025-1-namhyung@kernel.org Based-on-patch-by: Eugene Loh --- include/uapi/linux/bpf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7604e7d5438f..d14b10b85e51 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3009,8 +3009,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack= * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description @@ -4316,8 +4316,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack= * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) * Description -- cgit v1.2.3 From 6d8491910fcd3324d0f0ece3bd68e85ead3a04d7 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 1 Mar 2022 06:03:47 +0000 Subject: KVM: x86: Introduce KVM_CAP_DISABLE_QUIRKS2 KVM_CAP_DISABLE_QUIRKS is irrevocably broken. The capability does not advertise the set of quirks which may be disabled to userspace, so it is impossible to predict the behavior of KVM. Worse yet, KVM_CAP_DISABLE_QUIRKS will tolerate any value for cap->args[0], meaning it fails to reject attempts to set invalid quirk bits. The only valid workaround for the quirky quirks API is to add a new CAP. Actually advertise the set of quirks that can be disabled to userspace so it can predict KVM's behavior. Reject values for cap->args[0] that contain invalid bits. Finally, add documentation for the new capability and describe the existing quirks. Signed-off-by: Oliver Upton Message-Id: <20220301060351.442881-5-oupton@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d2f1efc3aa35..91a6fe4e02c0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1143,6 +1143,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_AIL_MODE_3 210 #define KVM_CAP_S390_MEM_OP_EXTENSION 211 #define KVM_CAP_PMU_CAPABILITY 212 +#define KVM_CAP_DISABLE_QUIRKS2 213 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 824ddc601adc2cc48efb7f58b57997986c1c1276 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 22 Mar 2022 14:45:32 -0700 Subject: userfaultfd: provide unmasked address on page-fault Userfaultfd is supposed to provide the full address (i.e., unmasked) of the faulting access back to userspace. However, that is not the case for quite some time. Even running "userfaultfd_demo" from the userfaultfd man page provides the wrong output (and contradicts the man page). Notice that "UFFD_EVENT_PAGEFAULT event" shows the masked address (7fc5e30b3000) and not the first read address (0x7fc5e30b300f). Address returned by mmap() = 0x7fc5e30b3000 fault_handler_thread(): poll() returns: nready = 1; POLLIN = 1; POLLERR = 0 UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fc5e30b3000 (uffdio_copy.copy returned 4096) Read address 0x7fc5e30b300f in main(): A Read address 0x7fc5e30b340f in main(): A Read address 0x7fc5e30b380f in main(): A Read address 0x7fc5e30b3c0f in main(): A The exact address is useful for various reasons and specifically for prefetching decisions. If it is known that the memory is populated by certain objects whose size is not page-aligned, then based on the faulting address, the uffd-monitor can decide whether to prefetch and prefault the adjacent page. This bug has been for quite some time in the kernel: since commit 1a29d85eb0f1 ("mm: use vmf->address instead of of vmf->virtual_address") vmf->virtual_address"), which dates back to 2016. A concern has been raised that existing userspace application might rely on the old/wrong behavior in which the address is masked. Therefore, it was suggested to provide the masked address unless the user explicitly asks for the exact address. Add a new userfaultfd feature UFFD_FEATURE_EXACT_ADDRESS to direct userfaultfd to provide the exact address. Add a new "real_address" field to vmf to hold the unmasked address. Provide the address to userspace accordingly. Initialize real_address in various code-paths to be consistent with address, even when it is not used, to be on the safe side. [namit@vmware.com: initialize real_address on all code paths, per Jan] Link: https://lkml.kernel.org/r/20220226022655.350562-1-namit@vmware.com [akpm@linux-foundation.org: fix typo in comment, per Jan] Link: https://lkml.kernel.org/r/20220218041003.3508-1-namit@vmware.com Signed-off-by: Nadav Amit Acked-by: Peter Xu Reviewed-by: David Hildenbrand Acked-by: Mike Rapoport Reviewed-by: Jan Kara Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 05b31d60acf6..ef739054cb1c 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -32,7 +32,8 @@ UFFD_FEATURE_SIGBUS | \ UFFD_FEATURE_THREAD_ID | \ UFFD_FEATURE_MINOR_HUGETLBFS | \ - UFFD_FEATURE_MINOR_SHMEM) + UFFD_FEATURE_MINOR_SHMEM | \ + UFFD_FEATURE_EXACT_ADDRESS) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -189,6 +190,10 @@ struct uffdio_api { * * UFFD_FEATURE_MINOR_SHMEM indicates the same support as * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. + * + * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page + * faults would be provided and the offset within the page would not be + * masked. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -201,6 +206,7 @@ struct uffdio_api { #define UFFD_FEATURE_THREAD_ID (1<<8) #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) #define UFFD_FEATURE_MINOR_SHMEM (1<<10) +#define UFFD_FEATURE_EXACT_ADDRESS (1<<11) __u64 features; __u64 ioctls; -- cgit v1.2.3 From e99653afeb9585350644c5ae4b0ca987cbe8d053 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 9 Mar 2022 17:22:53 +0100 Subject: rtc: add new RTC_FEATURE_ALARM_WAKEUP_ONLY feature Some RTCs have an IRQ pin that is not connected to a CPU interrupt but rather directly to a PMIC or power supply. In that case, it is still useful to be able to set alarms but we shouldn't expect interrupts. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220309162301.61679-22-alexandre.belloni@bootlin.com --- include/uapi/linux/rtc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index 03e5b776e597..97aca4503a6a 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -133,7 +133,8 @@ struct rtc_param { #define RTC_FEATURE_UPDATE_INTERRUPT 4 #define RTC_FEATURE_CORRECTION 5 #define RTC_FEATURE_BACKUP_SWITCH_MODE 6 -#define RTC_FEATURE_CNT 7 +#define RTC_FEATURE_ALARM_WAKEUP_ONLY 7 +#define RTC_FEATURE_CNT 8 /* parameter list */ #define RTC_PARAM_FEATURES 0 -- cgit v1.2.3 From c724c866bb70cb8c607081a26823a1f0ebde4387 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 23 Mar 2022 16:05:29 -0700 Subject: linux/types.h: remove unnecessary __bitwise__ There are no users of "__bitwise__" except the definition of "__bitwise". Remove __bitwise__ and define __bitwise directly. This is a follow-up to 05de97003c77 ("linux/types.h: enable endian checks for all sparse builds"). [akpm@linux-foundation.org: change the tools/include/linux/types.h definition also] Link: https://lkml.kernel.org/r/20220310220927.245704-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Cc: Michael S. Tsirkin Cc: Jonathan Corbet Cc: Nathan Chancellor Cc: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/types.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index f6d2f83cbe29..71696f424ac8 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -20,11 +20,10 @@ */ #ifdef __CHECKER__ -#define __bitwise__ __attribute__((bitwise)) +#define __bitwise __attribute__((bitwise)) #else -#define __bitwise__ +#define __bitwise #endif -#define __bitwise __bitwise__ typedef __u16 __bitwise __le16; typedef __u16 __bitwise __be16; -- cgit v1.2.3 From 179fd6ba3bacbf7b19cbdf9b14be109d54318394 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 23 Mar 2022 16:05:32 -0700 Subject: Documentation/sparse: add hints about __CHECKER__ Several attributes depend on __CHECKER__, but previously there was no clue in the tree about when __CHECKER__ might be defined. Add hints at the most common places (__kernel, __user, __iomem, __bitwise) and in the sparse documentation. Link: https://lkml.kernel.org/r/20220310220927.245704-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Cc: Jonathan Corbet Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: "Michael S . Tsirkin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index 71696f424ac8..c4dc597f3dcf 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -19,6 +19,7 @@ * any application/library that wants linux/types.h. */ +/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ #ifdef __CHECKER__ #define __bitwise __attribute__((bitwise)) #else -- cgit v1.2.3 From 7ef66d186eb95f987a97fb3329b65c840e2dc9bf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 24 Mar 2022 06:53:18 -0600 Subject: io_uring: remove IORING_CQE_F_MSG This was introduced with the message ring opcode, but isn't strictly required for the request itself. The sender can encode what is needed in user_data, which is passed to the receiver. It's unclear if having a separate flag that essentially says "This CQE did not originate from an SQE on this ring" provides any real utility to applications. While we can always re-introduce a flag to provide this information, we cannot take it away at a later point in time. Remove the flag while we still can, before it's in a released kernel. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index d2be4eb22008..784adc6f6ed2 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -201,11 +201,9 @@ struct io_uring_cqe { * * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries - * IORING_CQE_F_MSG If set, CQE was generated with IORING_OP_MSG_RING */ #define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_MORE (1U << 1) -#define IORING_CQE_F_MSG (1U << 2) enum { IORING_CQE_BUFFER_SHIFT = 16, -- cgit v1.2.3 From 9457056ac426e5ed0671356509c8dcce69f8dee0 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 24 Mar 2022 18:14:12 -0700 Subject: mm: madvise: MADV_DONTNEED_LOCKED MADV_DONTNEED historically rejects mlocked ranges, but with MLOCK_ONFAULT and MCL_ONFAULT allowing to mlock without populating, there are valid use cases for depopulating locked ranges as well. Users mlock memory to protect secrets. There are allocators for secure buffers that want in-use memory generally mlocked, but cleared and invalidated memory to give up the physical pages. This could be done with explicit munlock -> mlock calls on free -> alloc of course, but that adds two unnecessary syscalls, heavy mmap_sem write locks, vma splits and re-merges - only to get rid of the backing pages. Users also mlockall(MCL_ONFAULT) to suppress sustained paging, but are okay with on-demand initial population. It seems valid to selectively free some memory during the lifetime of such a process, without having to mess with its overall policy. Why add a separate flag? Isn't this a pretty niche usecase? - MADV_DONTNEED has been bailing on locked vmas forever. It's at least conceivable that someone, somewhere is relying on mlock to protect data from perhaps broader invalidation calls. Changing this behavior now could lead to quiet data corruption. - It also clarifies expectations around MADV_FREE and maybe MADV_REMOVE. It avoids the situation where one quietly behaves different than the others. MADV_FREE_LOCKED can be added later. - The combination of mlock() and madvise() in the first place is probably niche. But where it happens, I'd say that dropping pages from a locked region once they don't contain secrets or won't page anymore is much saner than relying on mlock to protect memory from speculative or errant invalidation calls. It's just that we can't change the default behavior because of the two previous points. Given that, an explicit new flag seems to make the most sense. [hannes@cmpxchg.org: fix mips build] Link: https://lkml.kernel.org/r/20220304171912.305060-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Reviewed-by: Mike Kravetz Reviewed-by: Shakeel Butt Acked-by: Vlastimil Babka Cc: Nadav Amit Cc: David Hildenbrand Cc: Dr. David Alan Gilbert Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/mman-common.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 1567a3294c3d..6c1aa92a92e4 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -75,6 +75,8 @@ #define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ #define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ +#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + /* compatibility flags */ #define MAP_FILE 0 -- cgit v1.2.3 From 90a6951b58e935124eeb7ecd9fbc2426f841ac0c Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Tue, 15 Feb 2022 11:04:29 +0530 Subject: Add definition of VIRTIO_F_IN_ORDER feature bit This patch adds the definition of VIRTIO_F_IN_ORDER feature bit in the relevant header file to make it available in QEMU's linux standard header file virtio_config.h, which is updated using scripts/update-linux-headers.sh Signed-off-by: Gautam Dawar Link: https://lore.kernel.org/r/20220215053430.24650-1-gdawar@xilinx.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/uapi/linux/virtio_config.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index b5eda06f0d57..f0fb0ae021c0 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -82,6 +82,12 @@ /* This feature indicates support for the packed virtqueue layout. */ #define VIRTIO_F_RING_PACKED 34 +/* + * Inorder feature indicates that all buffers are used by the device + * in the same order in which they have been made available. + */ +#define VIRTIO_F_IN_ORDER 35 + /* * This feature indicates that memory accesses by the driver and the * device are ordered in a way described by the platform. -- cgit v1.2.3 From 13d640a3e9a3ac7ec694843d3d3b785e85fb8cb8 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Wed, 2 Mar 2022 11:39:14 +0800 Subject: virtio_crypto: Introduce VIRTIO_CRYPTO_NOSPC Base on the lastest virtio crypto spec, define VIRTIO_CRYPTO_NOSPC. Reviewed-by: Gonglei Signed-off-by: zhenwei pi Link: https://lore.kernel.org/r/20220302033917.1295334-2-pizhenwei@bytedance.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_crypto.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index a03932f10565..1166a49084b0 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -408,6 +408,7 @@ struct virtio_crypto_op_data_req { #define VIRTIO_CRYPTO_BADMSG 2 #define VIRTIO_CRYPTO_NOTSUPP 3 #define VIRTIO_CRYPTO_INVSESS 4 /* Invalid session id */ +#define VIRTIO_CRYPTO_NOSPC 5 /* no free session ID */ /* The accelerator hardware is ready */ #define VIRTIO_CRYPTO_S_HW_READY (1 << 0) -- cgit v1.2.3 From 24e19590628b58578748eeaec8140bf9c9dc00d9 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Wed, 2 Mar 2022 11:39:15 +0800 Subject: virtio-crypto: introduce akcipher service Introduce asymmetric service definition, asymmetric operations and several well known algorithms. Co-developed-by: lei he Signed-off-by: lei he Signed-off-by: zhenwei pi Link: https://lore.kernel.org/r/20220302033917.1295334-3-pizhenwei@bytedance.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Gonglei --- include/uapi/linux/virtio_crypto.h | 81 +++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index 1166a49084b0..71a54a6849ca 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -37,6 +37,7 @@ #define VIRTIO_CRYPTO_SERVICE_HASH 1 #define VIRTIO_CRYPTO_SERVICE_MAC 2 #define VIRTIO_CRYPTO_SERVICE_AEAD 3 +#define VIRTIO_CRYPTO_SERVICE_AKCIPHER 4 #define VIRTIO_CRYPTO_OPCODE(service, op) (((service) << 8) | (op)) @@ -57,6 +58,10 @@ struct virtio_crypto_ctrl_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02) #define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03) +#define VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x04) +#define VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x05) __le32 opcode; __le32 algo; __le32 flag; @@ -180,6 +185,58 @@ struct virtio_crypto_aead_create_session_req { __u8 padding[32]; }; +struct virtio_crypto_rsa_session_para { +#define VIRTIO_CRYPTO_RSA_RAW_PADDING 0 +#define VIRTIO_CRYPTO_RSA_PKCS1_PADDING 1 + __le32 padding_algo; + +#define VIRTIO_CRYPTO_RSA_NO_HASH 0 +#define VIRTIO_CRYPTO_RSA_MD2 1 +#define VIRTIO_CRYPTO_RSA_MD3 2 +#define VIRTIO_CRYPTO_RSA_MD4 3 +#define VIRTIO_CRYPTO_RSA_MD5 4 +#define VIRTIO_CRYPTO_RSA_SHA1 5 +#define VIRTIO_CRYPTO_RSA_SHA256 6 +#define VIRTIO_CRYPTO_RSA_SHA384 7 +#define VIRTIO_CRYPTO_RSA_SHA512 8 +#define VIRTIO_CRYPTO_RSA_SHA224 9 + __le32 hash_algo; +}; + +struct virtio_crypto_ecdsa_session_para { +#define VIRTIO_CRYPTO_CURVE_UNKNOWN 0 +#define VIRTIO_CRYPTO_CURVE_NIST_P192 1 +#define VIRTIO_CRYPTO_CURVE_NIST_P224 2 +#define VIRTIO_CRYPTO_CURVE_NIST_P256 3 +#define VIRTIO_CRYPTO_CURVE_NIST_P384 4 +#define VIRTIO_CRYPTO_CURVE_NIST_P521 5 + __le32 curve_id; + __le32 padding; +}; + +struct virtio_crypto_akcipher_session_para { +#define VIRTIO_CRYPTO_NO_AKCIPHER 0 +#define VIRTIO_CRYPTO_AKCIPHER_RSA 1 +#define VIRTIO_CRYPTO_AKCIPHER_DSA 2 +#define VIRTIO_CRYPTO_AKCIPHER_ECDSA 3 + __le32 algo; + +#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PUBLIC 1 +#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PRIVATE 2 + __le32 keytype; + __le32 keylen; + + union { + struct virtio_crypto_rsa_session_para rsa; + struct virtio_crypto_ecdsa_session_para ecdsa; + } u; +}; + +struct virtio_crypto_akcipher_create_session_req { + struct virtio_crypto_akcipher_session_para para; + __u8 padding[36]; +}; + struct virtio_crypto_alg_chain_session_para { #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER 1 #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH 2 @@ -247,6 +304,8 @@ struct virtio_crypto_op_ctrl_req { mac_create_session; struct virtio_crypto_aead_create_session_req aead_create_session; + struct virtio_crypto_akcipher_create_session_req + akcipher_create_session; struct virtio_crypto_destroy_session_req destroy_session; __u8 padding[56]; @@ -266,6 +325,14 @@ struct virtio_crypto_op_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00) #define VIRTIO_CRYPTO_AEAD_DECRYPT \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01) +#define VIRTIO_CRYPTO_AKCIPHER_ENCRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x00) +#define VIRTIO_CRYPTO_AKCIPHER_DECRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x01) +#define VIRTIO_CRYPTO_AKCIPHER_SIGN \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x02) +#define VIRTIO_CRYPTO_AKCIPHER_VERIFY \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x03) __le32 opcode; /* algo should be service-specific algorithms */ __le32 algo; @@ -390,6 +457,16 @@ struct virtio_crypto_aead_data_req { __u8 padding[32]; }; +struct virtio_crypto_akcipher_para { + __le32 src_data_len; + __le32 dst_data_len; +}; + +struct virtio_crypto_akcipher_data_req { + struct virtio_crypto_akcipher_para para; + __u8 padding[40]; +}; + /* The request of the data virtqueue's packet */ struct virtio_crypto_op_data_req { struct virtio_crypto_op_header header; @@ -399,6 +476,7 @@ struct virtio_crypto_op_data_req { struct virtio_crypto_hash_data_req hash_req; struct virtio_crypto_mac_data_req mac_req; struct virtio_crypto_aead_data_req aead_req; + struct virtio_crypto_akcipher_data_req akcipher_req; __u8 padding[48]; } u; }; @@ -409,6 +487,7 @@ struct virtio_crypto_op_data_req { #define VIRTIO_CRYPTO_NOTSUPP 3 #define VIRTIO_CRYPTO_INVSESS 4 /* Invalid session id */ #define VIRTIO_CRYPTO_NOSPC 5 /* no free session ID */ +#define VIRTIO_CRYPTO_KEY_REJECTED 6 /* Signature verification failed */ /* The accelerator hardware is ready */ #define VIRTIO_CRYPTO_S_HW_READY (1 << 0) @@ -439,7 +518,7 @@ struct virtio_crypto_config { __le32 max_cipher_key_len; /* Maximum length of authenticated key */ __le32 max_auth_key_len; - __le32 reserve; + __le32 akcipher_algo; /* Maximum size of each crypto request's content */ __le64 max_size; }; -- cgit v1.2.3 From a61280ddddaa45f95b60dd54c05f8e0e5b6810b7 Mon Sep 17 00:00:00 2001 From: Longpeng Date: Tue, 15 Mar 2022 11:25:51 +0800 Subject: vdpa: support exposing the config size to userspace - GET_CONFIG_SIZE: return the size of the virtio config space. The size contains the fields which are conditional on feature bits. Acked-by: Jason Wang Signed-off-by: Longpeng Link: https://lore.kernel.org/r/20220315032553.455-2-longpeng2@huawei.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- include/uapi/linux/vhost.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index c998860d7bbc..bc74e95a273a 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -150,4 +150,8 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + #endif -- cgit v1.2.3 From b04d910af330b55e1d5d6eb9ecd53a375a9cf81c Mon Sep 17 00:00:00 2001 From: Longpeng Date: Tue, 15 Mar 2022 11:25:53 +0800 Subject: vdpa: support exposing the count of vqs to userspace - GET_VQS_COUNT: the count of virtqueues that exposed Signed-off-by: Longpeng Link: https://lore.kernel.org/r/20220315032553.455-4-longpeng2@huawei.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Longpeng <longpeng2@huawei.com>
Reviewed-by: Stefano Garzarella --- include/uapi/linux/vhost.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index bc74e95a273a..5d99e7c242a2 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -154,4 +154,7 @@ /* Get the config size */ #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif -- cgit v1.2.3 From f941c51eeac7ebe0f8ec30943bf78e7f60aad039 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 29 Mar 2022 20:18:15 +0000 Subject: loop: fix ioctl calls using compat_loop_info Support for cryptoloop was deleted in commit 47e9624616c8 ("block: remove support for cryptoloop and the xor transfer"), making the usage of loop_info->lo_encrypt_type obsolete. However, this member was also removed from the compat_loop_info definition and this breaks userspace ioctl calls for 32-bit binaries and CONFIG_COMPAT=y. This patch restores the compat_loop_info->lo_encrypt_type member and marks it obsolete as well as in the uapi header definitions. Fixes: 47e9624616c8 ("block: remove support for cryptoloop and the xor transfer") Signed-off-by: Carlos Llamas Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220329201815.1347500-1-cmllamas@google.com Signed-off-by: Jens Axboe --- include/uapi/linux/loop.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index 24a1c45bd1ae..98e60801195e 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -45,7 +45,7 @@ struct loop_info { unsigned long lo_inode; /* ioctl r/o */ __kernel_old_dev_t lo_rdevice; /* ioctl r/o */ int lo_offset; - int lo_encrypt_type; + int lo_encrypt_type; /* obsolete, ignored */ int lo_encrypt_key_size; /* ioctl w/o */ int lo_flags; char lo_name[LO_NAME_SIZE]; @@ -61,7 +61,7 @@ struct loop_info64 { __u64 lo_offset; __u64 lo_sizelimit;/* bytes, 0 == max available */ __u32 lo_number; /* ioctl r/o */ - __u32 lo_encrypt_type; + __u32 lo_encrypt_type; /* obsolete, ignored */ __u32 lo_encrypt_key_size; /* ioctl w/o */ __u32 lo_flags; __u8 lo_file_name[LO_NAME_SIZE]; -- cgit v1.2.3 From 1cd927ad6f62f27d8908498dcbf61395c5dd5fe2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 1 Apr 2022 14:39:03 -0400 Subject: tracing: mark user_events as BROKEN After being merged, user_events become more visible to a wider audience that have concerns with the current API. It is too late to fix this for this release, but instead of a full revert, just mark it as BROKEN (which prevents it from being selected in make config). Then we can work finding a better API. If that fails, then it will need to be completely reverted. To not have the code silently bitrot, still allow building it with COMPILE_TEST. And to prevent the uapi header from being installed, then later changed, and then have an old distro user space see the old version, move the header file out of the uapi directory. Surround the include with CONFIG_COMPILE_TEST to the current location, but when the BROKEN tag is taken off, it will use the uapi directory, and fail to compile. This is a good way to remind us to move the header back. Link: https://lore.kernel.org/all/20220330155835.5e1f6669@gandalf.local.home Link: https://lkml.kernel.org/r/20220330201755.29319-1-mathieu.desnoyers@efficios.com Suggested-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) Signed-off-by: Linus Torvalds --- include/uapi/linux/user_events.h | 116 --------------------------------------- 1 file changed, 116 deletions(-) delete mode 100644 include/uapi/linux/user_events.h (limited to 'include/uapi') diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h deleted file mode 100644 index e570840571e1..000000000000 --- a/include/uapi/linux/user_events.h +++ /dev/null @@ -1,116 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (c) 2021, Microsoft Corporation. - * - * Authors: - * Beau Belgrave - */ -#ifndef _UAPI_LINUX_USER_EVENTS_H -#define _UAPI_LINUX_USER_EVENTS_H - -#include -#include - -#ifdef __KERNEL__ -#include -#else -#include -#endif - -#define USER_EVENTS_SYSTEM "user_events" -#define USER_EVENTS_PREFIX "u:" - -/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */ -#define EVENT_BIT_FTRACE 0 -#define EVENT_BIT_PERF 1 -#define EVENT_BIT_OTHER 7 - -#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE) -#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF) -#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER) - -/* Create dynamic location entry within a 32-bit value */ -#define DYN_LOC(offset, size) ((size) << 16 | (offset)) - -/* Use raw iterator for attached BPF program(s), no affect on ftrace/perf */ -#define FLAG_BPF_ITER (1 << 0) - -/* - * Describes an event registration and stores the results of the registration. - * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum - * must set the size and name_args before invocation. - */ -struct user_reg { - - /* Input: Size of the user_reg structure being used */ - __u32 size; - - /* Input: Pointer to string with event name, description and flags */ - __u64 name_args; - - /* Output: Byte index of the event within the status page */ - __u32 status_index; - - /* Output: Index of the event to use when writing data */ - __u32 write_index; -}; - -#define DIAG_IOC_MAGIC '*' - -/* Requests to register a user_event */ -#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg*) - -/* Requests to delete a user_event */ -#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*) - -/* Data type that was passed to the BPF program */ -enum { - /* Data resides in kernel space */ - USER_BPF_DATA_KERNEL, - - /* Data resides in user space */ - USER_BPF_DATA_USER, - - /* Data is a pointer to a user_bpf_iter structure */ - USER_BPF_DATA_ITER, -}; - -/* - * Describes an iovec iterator that BPF programs can use to access data for - * a given user_event write() / writev() call. - */ -struct user_bpf_iter { - - /* Offset of the data within the first iovec */ - __u32 iov_offset; - - /* Number of iovec structures */ - __u32 nr_segs; - - /* Pointer to iovec structures */ - const struct iovec *iov; -}; - -/* Context that BPF programs receive when attached to a user_event */ -struct user_bpf_context { - - /* Data type being passed (see union below) */ - __u32 data_type; - - /* Length of the data */ - __u32 data_len; - - /* Pointer to data, varies by data type */ - union { - /* Kernel data (data_type == USER_BPF_DATA_KERNEL) */ - void *kdata; - - /* User data (data_type == USER_BPF_DATA_USER) */ - void *udata; - - /* Direct iovec (data_type == USER_BPF_DATA_ITER) */ - struct user_bpf_iter *iter; - }; -}; - -#endif /* _UAPI_LINUX_USER_EVENTS_H */ -- cgit v1.2.3