From db751e309ff05461a0c8e114b1238d7a69cc1f18 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 16 Mar 2020 16:50:43 +0000 Subject: ELF: UAPI and Kconfig additions for ELF program properties Pull the basic ELF definitions relating to the NT_GNU_PROPERTY_TYPE_0 note from Yu-Cheng Yu's earlier x86 shstk series. Signed-off-by: Mark Brown Signed-off-by: Dave Martin Signed-off-by: Yu-cheng Yu Reviewed-by: Catalin Marinas Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas --- include/uapi/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 34c02e4290fe..c37731407074 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -36,6 +36,7 @@ typedef __s64 Elf64_Sxword; #define PT_LOPROC 0x70000000 #define PT_HIPROC 0x7fffffff #define PT_GNU_EH_FRAME 0x6474e550 +#define PT_GNU_PROPERTY 0x6474e553 #define PT_GNU_STACK (PT_LOOS + 0x474e551) -- cgit v1.2.3 From 00e19ceec80b03a43f626f891fcc53e57919f1b3 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 16 Mar 2020 16:50:44 +0000 Subject: ELF: Add ELF program property parsing support ELF program properties will be needed for detecting whether to enable optional architecture or ABI features for a new ELF process. For now, there are no generic properties that we care about, so do nothing unless CONFIG_ARCH_USE_GNU_PROPERTY=y. Otherwise, the presence of properties using the PT_PROGRAM_PROPERTY phdrs entry (if any), and notify each property to the arch code. For now, the added code is not used. Signed-off-by: Mark Brown Signed-off-by: Dave Martin Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas --- include/uapi/linux/elf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index c37731407074..20900f4496b7 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -368,6 +368,7 @@ typedef struct elf64_shdr { * Notes used in ET_CORE. Architectures export some of the arch register sets * using the corresponding note types via the PTRACE_GETREGSET and * PTRACE_SETREGSET requests. + * The note name for all these is "LINUX". */ #define NT_PRSTATUS 1 #define NT_PRFPREG 2 @@ -430,6 +431,9 @@ typedef struct elf64_shdr { #define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */ #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ +/* Note types with note name "GNU" */ +#define NT_GNU_PROPERTY_TYPE_0 5 + /* Note header in a PT_NOTE section */ typedef struct elf32_note { Elf32_Word n_namesz; /* Name size */ -- cgit v1.2.3 From ab7876a98a2160092133de4c648e94b18bc3f139 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 16 Mar 2020 16:50:47 +0000 Subject: arm64: elf: Enable BTI at exec based on ELF program properties For BTI protection to be as comprehensive as possible, it is desirable to have BTI enabled from process startup. If this is not done, the process must use mprotect() to enable BTI for each of its executable mappings, but this is painful to do in the libc startup code. It's simpler and more sound to have the kernel do it instead. To this end, detect BTI support in the executable (or ELF interpreter, as appropriate), via the NT_GNU_PROGRAM_PROPERTY_TYPE_0 note, and tweak the initial prot flags for the process' executable pages to include PROT_BTI as appropriate. Signed-off-by: Mark Brown Signed-off-by: Dave Martin Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas --- include/uapi/linux/elf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 20900f4496b7..c6dd0215482e 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -448,4 +448,10 @@ typedef struct elf64_note { Elf64_Word n_type; /* Content type */ } Elf64_Nhdr; +/* .note.gnu.property types for EM_AARCH64: */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + +/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) + #endif /* _UAPI_LINUX_ELF_H */ -- cgit v1.2.3 From 980737282232b752bb14dab96d77665c15889c36 Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Thu, 2 Apr 2020 11:45:31 +0300 Subject: capabilities: Introduce CAP_PERFMON to kernel and user space Introduce the CAP_PERFMON capability designed to secure system performance monitoring and observability operations so that CAP_PERFMON can assist CAP_SYS_ADMIN capability in its governing role for performance monitoring and observability subsystems. CAP_PERFMON hardens system security and integrity during performance monitoring and observability operations by decreasing attack surface that is available to a CAP_SYS_ADMIN privileged process [2]. Providing the access to system performance monitoring and observability operations under CAP_PERFMON capability singly, without the rest of CAP_SYS_ADMIN credentials, excludes chances to misuse the credentials and makes the operation more secure. Thus, CAP_PERFMON implements the principle of least privilege for performance monitoring and observability operations (POSIX IEEE 1003.1e: 2.2.2.39 principle of least privilege: A security design principle that states that a process or program be granted only those privileges (e.g., capabilities) necessary to accomplish its legitimate function, and only for the time that such privileges are actually required) CAP_PERFMON meets the demand to secure system performance monitoring and observability operations for adoption in security sensitive, restricted, multiuser production environments (e.g. HPC clusters, cloud and virtual compute environments), where root or CAP_SYS_ADMIN credentials are not available to mass users of a system, and securely unblocks applicability and scalability of system performance monitoring and observability operations beyond root and CAP_SYS_ADMIN use cases. CAP_PERFMON takes over CAP_SYS_ADMIN credentials related to system performance monitoring and observability operations and balances amount of CAP_SYS_ADMIN credentials following the recommendations in the capabilities man page [1] for CAP_SYS_ADMIN: "Note: this capability is overloaded; see Notes to kernel developers, below." For backward compatibility reasons access to system performance monitoring and observability subsystems of the kernel remains open for CAP_SYS_ADMIN privileged processes but CAP_SYS_ADMIN capability usage for secure system performance monitoring and observability operations is discouraged with respect to the designed CAP_PERFMON capability. Although the software running under CAP_PERFMON can not ensure avoidance of related hardware issues, the software can still mitigate these issues following the official hardware issues mitigation procedure [2]. The bugs in the software itself can be fixed following the standard kernel development process [3] to maintain and harden security of system performance monitoring and observability operations. [1] http://man7.org/linux/man-pages/man7/capabilities.7.html [2] https://www.kernel.org/doc/html/latest/process/embargoed-hardware-issues.html [3] https://www.kernel.org/doc/html/latest/admin-guide/security-bugs.html Signed-off-by: Alexey Budankov Acked-by: James Morris Acked-by: Serge E. Hallyn Acked-by: Song Liu Acked-by: Stephen Smalley Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Igor Lubashev Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: intel-gfx@lists.freedesktop.org Cc: linux-doc@vger.kernel.org Cc: linux-man@vger.kernel.org Cc: linux-security-module@vger.kernel.org Cc: selinux@vger.kernel.org Link: http://lore.kernel.org/lkml/5590d543-82c6-490a-6544-08e6a5517db0@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/capability.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 272dc69fa080..e58c9636741b 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -367,8 +367,14 @@ struct vfs_ns_cap_data { #define CAP_AUDIT_READ 37 +/* + * Allow system performance and observability privileged operations + * using perf_events, i915_perf and other kernel subsystems + */ + +#define CAP_PERFMON 38 -#define CAP_LAST_CAP CAP_AUDIT_READ +#define CAP_LAST_CAP CAP_PERFMON #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) -- cgit v1.2.3 From 104edb94cc4b3101bab33161cd861de13e85610b Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Fri, 27 Mar 2020 10:59:48 +0530 Subject: tee: add private login method for kernel clients There are use-cases where user-space shouldn't be allowed to communicate directly with a TEE device which is dedicated to provide a specific service for a kernel client. So add a private login method for kernel clients and disallow user-space to open-session using GP implementation defined login method range: (0x80000000 - 0xBFFFFFFF). Reviewed-by: Jerome Forissier Signed-off-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/uapi/linux/tee.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index 6596f3a09e54..b619f37ee03e 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -173,6 +173,15 @@ struct tee_ioctl_buf_data { #define TEE_IOCTL_LOGIN_APPLICATION 4 #define TEE_IOCTL_LOGIN_USER_APPLICATION 5 #define TEE_IOCTL_LOGIN_GROUP_APPLICATION 6 +/* + * Disallow user-space to use GP implementation specific login + * method range (0x80000000 - 0xBFFFFFFF). This range is rather + * being reserved for REE kernel clients or TEE implementation. + */ +#define TEE_IOCTL_LOGIN_REE_KERNEL_MIN 0x80000000 +#define TEE_IOCTL_LOGIN_REE_KERNEL_MAX 0xBFFFFFFF +/* Private login method for REE kernel clients */ +#define TEE_IOCTL_LOGIN_REE_KERNEL 0x80000000 /** * struct tee_ioctl_param - parameter -- cgit v1.2.3 From eec517cdb4810b3843eb7707971de3164088bff1 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 20 Apr 2020 00:11:50 +0200 Subject: net: Add IF_OPER_TESTING RFC 2863 defines the operational state testing. Add support for this state, both as a IF_LINK_MODE_ and __LINK_STATE_. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index be714cd8c826..797ba2c1562a 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -178,6 +178,7 @@ enum { enum { IF_LINK_MODE_DEFAULT, IF_LINK_MODE_DORMANT, /* limit upward transition to dormant */ + IF_LINK_MODE_TESTING, /* limit upward transition to testing */ }; /* -- cgit v1.2.3 From 72ef5e52b3f74c0be47b20f5c434b7ecc830cf40 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 14 Apr 2020 18:48:35 +0200 Subject: docs: fix broken references to text files Several references got broken due to txt to ReST conversion. Several of them can be automatically fixed with: scripts/documentation-file-ref-check --fix Reviewed-by: Mathieu Poirier # hwtracing/coresight/Kconfig Reviewed-by: Paul E. McKenney # memory-barrier.txt Acked-by: Alex Shi # translations/zh_CN Acked-by: Federico Vaga # translations/it_IT Acked-by: Marc Zyngier # kvm/arm64 Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/6f919ddb83a33b5f2a63b6b5f0575737bb2b36aa.1586881715.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/uapi/linux/ethtool_netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 7fde76366ba4..1711e57f7848 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -2,7 +2,7 @@ /* * include/uapi/linux/ethtool_netlink.h - netlink interface for ethtool * - * See Documentation/networking/ethtool-netlink.txt in kernel source tree for + * See Documentation/networking/ethtool-netlink.rst in kernel source tree for * doucumentation of the interface. */ -- cgit v1.2.3 From 3ecad8c2c1ff333e204c26e2f0dddfa623153f87 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 14 Apr 2020 18:48:36 +0200 Subject: docs: fix broken references for ReST files that moved around Some broken references happened due to shifting files around and ReST renames. Those can't be auto-fixed by the script, so let's fix them manually. Signed-off-by: Mauro Carvalho Chehab Acked-by: Corentin Labbe Link: https://lore.kernel.org/r/64773a12b4410aaf3e3be89e3ec7e34de2484eea.1586881715.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/uapi/linux/kvm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 428c7dde6b4b..fdd632c833b4 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -116,7 +116,7 @@ struct kvm_irq_level { * ACPI gsi notion of irq. * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. - * For ARM: See Documentation/virt/kvm/api.txt + * For ARM: See Documentation/virt/kvm/api.rst */ union { __u32 irq; @@ -1107,7 +1107,7 @@ struct kvm_xen_hvm_config { * * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies * the irqfd to operate in resampling mode for level triggered interrupt - * emulation. See Documentation/virt/kvm/api.txt. + * emulation. See Documentation/virt/kvm/api.rst. */ #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) -- cgit v1.2.3 From af690f459393017ce40bb9beef6a00e79511574d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 14 Apr 2020 18:48:56 +0200 Subject: firewire: firewire-cdev.hL get rid of a docs warning This warning: ./include/uapi/linux/firewire-cdev.h:312: WARNING: Inline literal start-string without end-string. is because %FOO doesn't work if there's a parenthesis at the string (as a parenthesis may indicate a function). So, mark the literal block using the alternate ``FOO`` syntax. Acked-by: Stefan Richter Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/9b2501a41eba27ccdd4603cac2353c0efba7a90a.1586881715.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/uapi/linux/firewire-cdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/firewire-cdev.h b/include/uapi/linux/firewire-cdev.h index 1acd2b179aef..7e5b5c10a49c 100644 --- a/include/uapi/linux/firewire-cdev.h +++ b/include/uapi/linux/firewire-cdev.h @@ -308,7 +308,7 @@ struct fw_cdev_event_iso_interrupt_mc { /** * struct fw_cdev_event_iso_resource - Iso resources were allocated or freed * @closure: See &fw_cdev_event_common; - * set by %FW_CDEV_IOC_(DE)ALLOCATE_ISO_RESOURCE(_ONCE) ioctl + * set by``FW_CDEV_IOC_(DE)ALLOCATE_ISO_RESOURCE(_ONCE)`` ioctl * @type: %FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED or * %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED * @handle: Reference by which an allocated resource can be deallocated -- cgit v1.2.3 From 1ca3cb46a992d83ebd093acc64241007b20c5033 Mon Sep 17 00:00:00 2001 From: Maheshwar Ajja Date: Mon, 16 Mar 2020 23:42:30 +0100 Subject: media: v4l2-ctrl: Add H264 profile and levels Add H264 profile "Contrained High" and H264 levels "5.2", "6.0", "6.1" and "6.2". Signed-off-by: Maheshwar Ajja Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 1a58d7cc4ccc..0ba1005c9651 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -473,6 +473,10 @@ enum v4l2_mpeg_video_h264_level { V4L2_MPEG_VIDEO_H264_LEVEL_4_2 = 13, V4L2_MPEG_VIDEO_H264_LEVEL_5_0 = 14, V4L2_MPEG_VIDEO_H264_LEVEL_5_1 = 15, + V4L2_MPEG_VIDEO_H264_LEVEL_5_2 = 16, + V4L2_MPEG_VIDEO_H264_LEVEL_6_0 = 17, + V4L2_MPEG_VIDEO_H264_LEVEL_6_1 = 18, + V4L2_MPEG_VIDEO_H264_LEVEL_6_2 = 19, }; #define V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA (V4L2_CID_MPEG_BASE+360) #define V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA (V4L2_CID_MPEG_BASE+361) @@ -501,6 +505,7 @@ enum v4l2_mpeg_video_h264_profile { V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA = 14, V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH = 15, V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH = 16, + V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH = 17, }; #define V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_HEIGHT (V4L2_CID_MPEG_BASE+364) #define V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_WIDTH (V4L2_CID_MPEG_BASE+365) -- cgit v1.2.3 From 4d797fce783a8eb11dd23463828db84743795046 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 1 Apr 2020 17:25:47 +0300 Subject: cfg80211: Unprotected Beacon frame RX indication Extend cfg80211_rx_unprot_mlme_mgmt() to cover indication of unprotected Beacon frames in addition to the previously used Deauthentication and Disassociation frames. The Beacon frame case is quite similar, but has couple of exceptions: this is used both with fully unprotected and also incorrectly protected frames and there is a rate limit on the events to avoid unnecessary flooding netlink events in case something goes wrong. Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20200401142548.6990-1-jouni@codeaurora.org [add missing kernel-doc] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 2b691161830f..afdd9802ccb8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1151,6 +1151,11 @@ * @NL80211_CMD_SET_TID_CONFIG: Data frame TID specific configuration * is passed using %NL80211_ATTR_TID_CONFIG attribute. * + * @NL80211_CMD_UNPROT_BEACON: Unprotected or incorrectly protected Beacon + * frame. This event is used to indicate that a received Beacon frame was + * dropped because it did not include a valid MME MIC while beacon + * protection was enabled (BIGTK configured in station mode). + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1377,6 +1382,8 @@ enum nl80211_commands { NL80211_CMD_SET_TID_CONFIG, + NL80211_CMD_UNPROT_BEACON, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ -- cgit v1.2.3 From 9dba48a6ece79da064655736dc7347a5fcadedef Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Apr 2020 12:40:15 +0200 Subject: cfg80211: support multicast RX registration For DPP, there's a need to receive multicast action frames, but many drivers need a special filter configuration for this. Support announcing from userspace in the management registration that multicast RX is required, with an extended feature flag if the driver handles this. Signed-off-by: Johannes Berg Reviewed-by: Sergey Matyukevich Link: https://lore.kernel.org/r/20200417124013.c46238801048.Ib041d437ce0bff28a0c6d5dc915f68f1d8591002@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index afdd9802ccb8..e0dc89eceab8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -687,6 +687,10 @@ * four bytes for vendor frames including the OUI. The registration * cannot be dropped, but is removed automatically when the netlink * socket is closed. Multiple registrations can be made. + * The %NL80211_ATTR_RECEIVE_MULTICAST flag attribute can be given if + * %NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS is available, in which + * case the registration can also be modified to include/exclude the + * flag, rather than requiring unregistration to change it. * @NL80211_CMD_REGISTER_ACTION: Alias for @NL80211_CMD_REGISTER_FRAME for * backward compatibility * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This @@ -2477,6 +2481,9 @@ enum nl80211_commands { * no roaming occurs between the reauth threshold and PMK expiration, * disassociation is still forced. * + * @NL80211_ATTR_RECEIVE_MULTICAST: multicast flag for the + * %NL80211_CMD_REGISTER_FRAME command, see the description there. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2952,6 +2959,8 @@ enum nl80211_attrs { NL80211_ATTR_PMK_LIFETIME, NL80211_ATTR_PMK_REAUTH_THRESHOLD, + NL80211_ATTR_RECEIVE_MULTICAST, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5691,6 +5700,9 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_DEL_IBSS_STA: The driver supports removing stations * in IBSS mode, essentially by dropping their state. * + * @NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS: management frame registrations + * are possible for multicast frames and those will be reported properly. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5742,6 +5754,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_CONTROL_PORT_NO_PREAUTH, NL80211_EXT_FEATURE_PROTECTED_TWT, NL80211_EXT_FEATURE_DEL_IBSS_STA, + NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 155d7c733807190258639c66b36340948f369349 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 20 Apr 2020 14:06:00 +0200 Subject: nl80211: allow client-only BIGTK support The current NL80211_EXT_FEATURE_BEACON_PROTECTION feature flag requires both AP and client support, add a new one called NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT that enables only support in client (and P2P-client) modes. Link: https://lore.kernel.org/r/20200420140559.6ba704053a5a.Ifeb869fb0b48e52fe0cb9c15572b93ac8a924f8d@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e0dc89eceab8..9679d561f7d0 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5690,6 +5690,8 @@ enum nl80211_feature_flags { * * @NL80211_EXT_FEATURE_BEACON_PROTECTION: The driver supports Beacon protection * and can receive key configuration for BIGTK using key indexes 6 and 7. + * @NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT: The driver supports Beacon + * protection as a client only and cannot transmit protected beacons. * * @NL80211_EXT_FEATURE_CONTROL_PORT_NO_PREAUTH: The driver can disable the * forwarding of preauth frames over the control port. They are then @@ -5755,6 +5757,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_PROTECTED_TWT, NL80211_EXT_FEATURE_DEL_IBSS_STA, NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS, + NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From acd05785e48c01edb2c4f4d014d28478b5f19fb5 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 17 Apr 2020 15:14:46 -0700 Subject: kvm: add capability for halt polling KVM_CAP_HALT_POLL is a per-VM capability that lets userspace control the halt-polling time, allowing halt-polling to be tuned or disabled on particular VMs. With dynamic halt-polling, a VM's VCPUs can poll from anywhere from [0, halt_poll_ns] on each halt. KVM_CAP_HALT_POLL sets the upper limit on the poll time. Signed-off-by: David Matlack Signed-off-by: Jon Cargille Reviewed-by: Jim Mattson Message-Id: <20200417221446.108733-1-jcargill@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 428c7dde6b4b..ac9eba0289d1 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1017,6 +1017,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_VCPU_RESETS 179 #define KVM_CAP_S390_PROTECTED 180 #define KVM_CAP_PPC_SECURE_GUEST 181 +#define KVM_CAP_HALT_POLL 182 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 71d19214776e61b33da48f7c1b46e522c7f78221 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Sun, 26 Apr 2020 09:15:25 -0700 Subject: bpf: add bpf_ktime_get_boot_ns() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a device like a cellphone which is constantly suspending and resuming CLOCK_MONOTONIC is not particularly useful for keeping track of or reacting to external network events. Instead you want to use CLOCK_BOOTTIME. Hence add bpf_ktime_get_boot_ns() as a mirror of bpf_ktime_get_ns() based around CLOCK_BOOTTIME instead of CLOCK_MONOTONIC. Signed-off-by: Maciej Żenczykowski Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7bbf1b65be10..4a6c47f3febe 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -652,6 +652,8 @@ union bpf_attr { * u64 bpf_ktime_get_ns(void) * Description * Return the time elapsed since system boot, in nanoseconds. + * Does not include time the system was suspended. + * See: clock_gettime(CLOCK_MONOTONIC) * Return * Current *ktime*. * @@ -3025,6 +3027,14 @@ union bpf_attr { * * **-EOPNOTSUPP** Unsupported operation, for example a * call from outside of TC ingress. * * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport). + * + * u64 bpf_ktime_get_boot_ns(void) + * Description + * Return the time elapsed since system boot, in nanoseconds. + * Does include the time the system was suspended. + * See: clock_gettime(CLOCK_BOOTTIME) + * Return + * Current *ktime*. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3151,7 +3161,8 @@ union bpf_attr { FN(xdp_output), \ FN(get_netns_cookie), \ FN(get_current_ancestor_cgroup_id), \ - FN(sk_assign), + FN(sk_assign), \ + FN(ktime_get_boot_ns), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 74f99482eae03195ced512b440b31d62bdb6e943 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 21 Apr 2020 10:04:16 -0500 Subject: netfilter: nf_conntrack: add IPS_HW_OFFLOAD status bit This bit indicates that the conntrack entry is offloaded to hardware flow table. nf_conntrack entry will be tagged with [HW_OFFLOAD] if it's offload to hardware. cat /proc/net/nf_conntrack ipv4 2 tcp 6 \ src=1.1.1.17 dst=1.1.1.16 sport=56394 dport=5001 \ src=1.1.1.16 dst=1.1.1.17 sport=5001 dport=56394 [HW_OFFLOAD] \ mark=0 zone=0 use=3 Note that HW_OFFLOAD/OFFLOAD/ASSURED are mutually exclusive. Changelog: * V1->V2: - Remove check of lastused from stats. It was meant for cases such as removing driver module while traffic still running. Better to handle such cases from garbage collector. Signed-off-by: Bodong Wang Reviewed-by: Oz Shlomo Reviewed-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index b6f0bb1dc799..4b3395082d15 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -114,15 +114,19 @@ enum ip_conntrack_status { IPS_OFFLOAD_BIT = 14, IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT), + /* Conntrack has been offloaded to hardware. */ + IPS_HW_OFFLOAD_BIT = 15, + IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT), + /* Be careful here, modifying these bits can make things messy, * so don't let users modify them directly. */ IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_UNTRACKED | - IPS_OFFLOAD), + IPS_OFFLOAD | IPS_HW_OFFLOAD), - __IPS_MAX_BIT = 15, + __IPS_MAX_BIT = 16, }; /* Connection tracking event types */ -- cgit v1.2.3 From 4714d13791f831d253852c8b5d657270becb8b2a Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sun, 26 Apr 2020 15:21:58 +0200 Subject: bridge: uapi: mrp: Add mrp attributes. Add new nested netlink attribute to configure the MRP. These attributes are used by the userspace to add/delete/configure MRP instances and by the kernel to notify the userspace when the MRP ring gets open/closed. MRP nested attribute has the following attributes: IFLA_BRIDGE_MRP_INSTANCE - the parameter type is br_mrp_instance which contains the instance id, and the ifindex of the two ports. The ports can't be part of multiple instances. This is used to create/delete MRP instances. IFLA_BRIDGE_MRP_PORT_STATE - the parameter type is u32. Which can be forwarding, blocking or disabled. IFLA_BRIDGE_MRP_PORT_ROLE - the parameter type is br_mrp_port_role which contains the instance id and the role. The role can be primary or secondary. IFLA_BRIDGE_MRP_RING_STATE - the parameter type is br_mrp_ring_state which contains the instance id and the state. The state can be open or closed. IFLA_BRIDGE_MRP_RING_ROLE - the parameter type is br_mrp_ring_role which contains the instance id and the ring role. The role can be MRM or MRC. IFLA_BRIDGE_MRP_START_TEST - the parameter type is br_mrp_start_test which contains the instance id, the interval at which to send the MRP_Test frames, how many test frames can be missed before declaring the ring open and the period which represent for how long to send the test frames. Also add the file include/uapi/linux/mrp_bridge.h which defines all the types used by MRP that are also needed by the userpace. Reviewed-by: Nikolay Aleksandrov Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 42 +++++++++++++++++++++ include/uapi/linux/if_ether.h | 1 + include/uapi/linux/mrp_bridge.h | 84 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 include/uapi/linux/mrp_bridge.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index bfe621ea51b3..bd8c95488f16 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -120,6 +120,7 @@ enum { IFLA_BRIDGE_MODE, IFLA_BRIDGE_VLAN_INFO, IFLA_BRIDGE_VLAN_TUNNEL_INFO, + IFLA_BRIDGE_MRP, __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) @@ -157,6 +158,47 @@ struct bridge_vlan_xstats { __u32 pad2; }; +enum { + IFLA_BRIDGE_MRP_UNSPEC, + IFLA_BRIDGE_MRP_INSTANCE, + IFLA_BRIDGE_MRP_PORT_STATE, + IFLA_BRIDGE_MRP_PORT_ROLE, + IFLA_BRIDGE_MRP_RING_STATE, + IFLA_BRIDGE_MRP_RING_ROLE, + IFLA_BRIDGE_MRP_START_TEST, + __IFLA_BRIDGE_MRP_MAX, +}; + +struct br_mrp_instance { + __u32 ring_id; + __u32 p_ifindex; + __u32 s_ifindex; +}; + +struct br_mrp_port_role { + __u32 ring_id; + __u32 role; +}; + +struct br_mrp_ring_state { + __u32 ring_id; + __u32 ring_state; +}; + +struct br_mrp_ring_role { + __u32 ring_id; + __u32 ring_role; +}; + +struct br_mrp_start_test { + __u32 ring_id; + __u32 interval; + __u32 max_miss; + __u32 period; +}; + +#define IFLA_BRIDGE_MRP_MAX (__IFLA_BRIDGE_MRP_MAX - 1) + struct bridge_stp_xstats { __u64 transition_blk; __u64 transition_fwd; diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index f6ceb2e63d1e..d6de2b167448 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -92,6 +92,7 @@ #define ETH_P_PREAUTH 0x88C7 /* 802.11 Preauthentication */ #define ETH_P_TIPC 0x88CA /* TIPC */ #define ETH_P_LLDP 0x88CC /* Link Layer Discovery Protocol */ +#define ETH_P_MRP 0x88E3 /* Media Redundancy Protocol */ #define ETH_P_MACSEC 0x88E5 /* 802.1ae MACsec */ #define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */ #define ETH_P_MVRP 0x88F5 /* 802.1Q MVRP */ diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h new file mode 100644 index 000000000000..2600cdf5a284 --- /dev/null +++ b/include/uapi/linux/mrp_bridge.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_MRP_BRIDGE_H_ +#define _UAPI_LINUX_MRP_BRIDGE_H_ + +#include +#include + +#define MRP_MAX_FRAME_LENGTH 200 +#define MRP_DEFAULT_PRIO 0x8000 +#define MRP_DOMAIN_UUID_LENGTH 16 +#define MRP_VERSION 1 +#define MRP_FRAME_PRIO 7 + +enum br_mrp_ring_role_type { + BR_MRP_RING_ROLE_DISABLED, + BR_MRP_RING_ROLE_MRC, + BR_MRP_RING_ROLE_MRM, +}; + +enum br_mrp_ring_state_type { + BR_MRP_RING_STATE_OPEN, + BR_MRP_RING_STATE_CLOSED, +}; + +enum br_mrp_port_state_type { + BR_MRP_PORT_STATE_DISABLED, + BR_MRP_PORT_STATE_BLOCKED, + BR_MRP_PORT_STATE_FORWARDING, + BR_MRP_PORT_STATE_NOT_CONNECTED, +}; + +enum br_mrp_port_role_type { + BR_MRP_PORT_ROLE_PRIMARY, + BR_MRP_PORT_ROLE_SECONDARY, + BR_MRP_PORT_ROLE_NONE, +}; + +enum br_mrp_tlv_header_type { + BR_MRP_TLV_HEADER_END = 0x0, + BR_MRP_TLV_HEADER_COMMON = 0x1, + BR_MRP_TLV_HEADER_RING_TEST = 0x2, + BR_MRP_TLV_HEADER_RING_TOPO = 0x3, + BR_MRP_TLV_HEADER_RING_LINK_DOWN = 0x4, + BR_MRP_TLV_HEADER_RING_LINK_UP = 0x5, +}; + +struct br_mrp_tlv_hdr { + __u8 type; + __u8 length; +}; + +struct br_mrp_end_hdr { + struct br_mrp_tlv_hdr hdr; +}; + +struct br_mrp_common_hdr { + __u16 seq_id; + __u8 domain[MRP_DOMAIN_UUID_LENGTH]; +}; + +struct br_mrp_ring_test_hdr { + __u16 prio; + __u8 sa[ETH_ALEN]; + __u16 port_role; + __u16 state; + __u16 transitions; + __u32 timestamp; +}; + +struct br_mrp_ring_topo_hdr { + __u16 prio; + __u8 sa[ETH_ALEN]; + __u16 interval; +}; + +struct br_mrp_ring_link_hdr { + __u8 sa[ETH_ALEN]; + __u16 port_role; + __u16 interval; + __u16 blocked; +}; + +#endif -- cgit v1.2.3 From 3e54442c93845316762b1b3c75e654463fd1b715 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sun, 26 Apr 2020 15:22:01 +0200 Subject: net: bridge: Add port attribute IFLA_BRPORT_MRP_RING_OPEN This patch adds a new port attribute, IFLA_BRPORT_MRP_RING_OPEN, which allows to notify the userspace when the port lost the continuite of MRP frames. This attribute is set by kernel whenever the SW or HW detects that the ring is being open or closed. Reviewed-by: Nikolay Aleksandrov Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 127c704eeba9..a009365ad67b 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -343,6 +343,7 @@ enum { IFLA_BRPORT_NEIGH_SUPPRESS, IFLA_BRPORT_ISOLATED, IFLA_BRPORT_BACKUP_PORT, + IFLA_BRPORT_MRP_RING_OPEN, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) -- cgit v1.2.3 From 9d2161bed4e39ef7a5e5f18f69c4a57d001051b9 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Wed, 22 Apr 2020 17:37:04 -0400 Subject: audit: log audit netlink multicast bind and unbind Log information about programs connecting to and disconnecting from the audit netlink multicast socket. This is needed so that during investigations a security officer can tell who or what had access to the audit trail. This helps to meet the FAU_SAR.2 requirement for Common Criteria. Here is the systemd startup event: type=PROCTITLE msg=audit(2020-04-22 10:10:21.787:10) : proctitle=/init type=SYSCALL msg=audit(2020-04-22 10:10:21.787:10) : arch=x86_64 syscall=bind success=yes exit=0 a0=0x19 a1=0x555f4aac7e90 a2=0xc a3=0x7ffcb792ff44 items=0 ppid=0 pid=1 auid=unset uid=root gid=root euid=root suid=root fsuid=root egid=root sgid=root fsgid=root tty=(none) ses=unset comm=systemd exe=/usr/lib/systemd/systemd subj=kernel key=(null) type=UNKNOWN[1335] msg=audit(2020-04-22 10:10:21.787:10) : pid=1 uid=root auid=unset tty=(none) ses=unset subj=kernel comm=systemd exe=/usr/lib/systemd/systemd nl-mcgrp=1 op=connect res=yes And events from the test suite that just uses close(): type=PROCTITLE msg=audit(2020-04-22 11:47:08.501:442) : proctitle=/usr/bin/perl -w amcast_joinpart/test type=SYSCALL msg=audit(2020-04-22 11:47:08.501:442) : arch=x86_64 syscall=bind success=yes exit=0 a0=0x7 a1=0x563004378760 a2=0xc a3=0x0 items=0 ppid=815 pid=818 auid=root uid=root gid=root euid=root suid=root fsuid=root egid=root sgid=root fsgid=root tty=ttyS0 ses=1 comm=perl exe=/usr/bin/perl subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=UNKNOWN[1335] msg=audit(2020-04-22 11:47:08.501:442) : pid=818 uid=root auid=root tty=ttyS0 ses=1 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 comm=perl exe=/usr/bin/perl nl-mcgrp=1 op=connect res=yes type=UNKNOWN[1335] msg=audit(2020-04-22 11:47:08.501:443) : pid=818 uid=root auid=root tty=ttyS0 ses=1 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 comm=perl exe=/usr/bin/perl nl-mcgrp=1 op=disconnect res=yes And the events from the test suite using setsockopt with NETLINK_DROP_MEMBERSHIP: type=PROCTITLE msg=audit(2020-04-22 11:39:53.291:439) : proctitle=/usr/bin/perl -w amcast_joinpart/test type=SYSCALL msg=audit(2020-04-22 11:39:53.291:439) : arch=x86_64 syscall=bind success=yes exit=0 a0=0x7 a1=0x5560877c2d20 a2=0xc a3=0x0 items=0 ppid=772 pid=775 auid=root uid=root gid=root euid=root suid=root fsuid=root egid=root sgid=root fsgid=root tty=ttyS0 ses=1 comm=perl exe=/usr/bin/perl subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=UNKNOWN[1335] msg=audit(2020-04-22 11:39:53.291:439) : pid=775 uid=root auid=root tty=ttyS0 ses=1 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 comm=perl exe=/usr/bin/perl nl-mcgrp=1 op=connect res=yes type=PROCTITLE msg=audit(2020-04-22 11:39:53.292:440) : proctitle=/usr/bin/perl -w amcast_joinpart/test type=SYSCALL msg=audit(2020-04-22 11:39:53.292:440) : arch=x86_64 syscall=setsockopt success=yes exit=0 a0=0x7 a1=SOL_NETLINK a2=0x2 a3=0x7ffc8366f000 items=0 ppid=772 pid=775 auid=root uid=root gid=root euid=root suid=root fsuid=root egid=root sgid=root fsgid=root tty=ttyS0 ses=1 comm=perl exe=/usr/bin/perl subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=UNKNOWN[1335] msg=audit(2020-04-22 11:39:53.292:440) : pid=775 uid=root auid=root tty=ttyS0 ses=1 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 comm=perl exe=/usr/bin/perl nl-mcgrp=1 op=disconnect res=yes Please see the upstream issue tracker at https://github.com/linux-audit/audit-kernel/issues/28 With the feature description at https://github.com/linux-audit/audit-kernel/wiki/RFE-Audit-Multicast-Socket-Join-Part The testsuite support is at https://github.com/rgbriggs/audit-testsuite/compare/ghak28-mcast-part-join https://github.com/linux-audit/audit-testsuite/pull/93 And the userspace support patch is at https://github.com/linux-audit/audit-userspace/pull/114 Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index a534d71e689a..9b6a973f4cc3 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -117,6 +117,7 @@ #define AUDIT_TIME_INJOFFSET 1332 /* Timekeeping offset injected */ #define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */ #define AUDIT_BPF 1334 /* BPF subsystem */ +#define AUDIT_EVENT_LISTENER 1335 /* Task joined multicast read socket */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ -- cgit v1.2.3 From 3ff7ddb1353da9b535e65702704cbadea1da9a00 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Apr 2020 21:55:37 +0200 Subject: netfilter: nft_nat: add netmap support This patch allows you to NAT the network address prefix onto another network address prefix, a.k.a. netmapping. Userspace must specify the NF_NAT_RANGE_NETMAP flag and the prefix address through the NFTA_NAT_REG_ADDR_MIN and NFTA_NAT_REG_ADDR_MAX netlink attributes. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_nat.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h index 4a95c0db14d4..a64586e77b24 100644 --- a/include/uapi/linux/netfilter/nf_nat.h +++ b/include/uapi/linux/netfilter/nf_nat.h @@ -11,6 +11,7 @@ #define NF_NAT_RANGE_PERSISTENT (1 << 3) #define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4) #define NF_NAT_RANGE_PROTO_OFFSET (1 << 5) +#define NF_NAT_RANGE_NETMAP (1 << 6) #define NF_NAT_RANGE_PROTO_RANDOM_ALL \ (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY) @@ -18,7 +19,8 @@ #define NF_NAT_RANGE_MASK \ (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED | \ NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT | \ - NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET) + NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \ + NF_NAT_RANGE_NETMAP) struct nf_nat_ipv4_range { unsigned int flags; -- cgit v1.2.3 From 5bb4b78be9c67b02a7f138850e9e89825181f555 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 6 May 2019 22:11:14 -0500 Subject: drm/amdkfd: New IOCTL to allocate queue GWS (v2) Add a new kfd ioctl to allocate queue GWS. Queue GWS is released on queue destroy. v2: re-introduce this API with the following fixes squashed in: - drm/amdkfd: fix null pointer dereference on dev - drm/amdkfd: Return proper error code for gws alloc API - drm/amdkfd: Remove GPU ID in GWS queue creation Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 20917c59f39c..4f6676428c5c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -410,6 +410,20 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { __u32 n_success; /* to/from KFD */ }; +/* Allocate GWS for specific queue + * + * @queue_id: queue's id that GWS is allocated for + * @num_gws: how many GWS to allocate + * @first_gws: index of the first GWS allocated. + * only support contiguous GWS allocation + */ +struct kfd_ioctl_alloc_queue_gws_args { + __u32 queue_id; /* to KFD */ + __u32 num_gws; /* to KFD */ + __u32 first_gws; /* from KFD */ + __u32 pad; +}; + struct kfd_ioctl_get_dmabuf_info_args { __u64 size; /* from KFD */ __u64 metadata_ptr; /* to KFD */ @@ -529,7 +543,10 @@ enum kfd_mmio_remap { #define AMDKFD_IOC_IMPORT_DMABUF \ AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) +#define AMDKFD_IOC_ALLOC_QUEUE_GWS \ + AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1E +#define AMDKFD_COMMAND_END 0x1F #endif -- cgit v1.2.3 From a3b80e1078943dc12553166fb08e258463dec013 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:16:06 -0700 Subject: bpf: Allocate ID for bpf_link Generate ID for each bpf_link using IDR, similarly to bpf_map and bpf_prog. bpf_link creation, initialization, attachment, and exposing to user-space through FD and ID is a complicated multi-step process, abstract it away through bpf_link_primer and bpf_link_prime(), bpf_link_settle(), and bpf_link_cleanup() internal API. They guarantee that until bpf_link is properly attached, user-space won't be able to access partially-initialized bpf_link either from FD or ID. All this allows to simplify bpf_link attachment and error handling code. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429001614.1544-3-andriin@fb.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4a6c47f3febe..6121aa487465 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -523,6 +523,7 @@ union bpf_attr { __u32 prog_id; __u32 map_id; __u32 btf_id; + __u32 link_id; }; __u32 next_id; __u32 open_flags; -- cgit v1.2.3 From 2d602c8cf40d65d4a7ac34fe18648d8778e6e594 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:16:07 -0700 Subject: bpf: Support GET_FD_BY_ID and GET_NEXT_ID for bpf_link Add support to look up bpf_link by ID and iterate over all existing bpf_links in the system. GET_FD_BY_ID code handles not-yet-ready bpf_link by checking that its ID hasn't been set to non-zero value yet. Setting bpf_link's ID is done as the very last step in finalizing bpf_link, together with installing FD. This approach allows users of bpf_link in kernel code to not worry about races between user-space and kernel code that hasn't finished attaching and initializing bpf_link. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429001614.1544-4-andriin@fb.com --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6121aa487465..7e6541fceade 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -113,6 +113,8 @@ enum bpf_cmd { BPF_MAP_DELETE_BATCH, BPF_LINK_CREATE, BPF_LINK_UPDATE, + BPF_LINK_GET_FD_BY_ID, + BPF_LINK_GET_NEXT_ID, }; enum bpf_map_type { -- cgit v1.2.3 From f2e10bff16a0fdd41ba278c84da9813700e356af Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:16:08 -0700 Subject: bpf: Add support for BPF_OBJ_GET_INFO_BY_FD for bpf_link Add ability to fetch bpf_link details through BPF_OBJ_GET_INFO_BY_FD command. Also enhance show_fdinfo to potentially include bpf_link type-specific information (similarly to obj_info). Also introduce enum bpf_link_type stored in bpf_link itself and expose it in UAPI. bpf_link_tracing also now will store and return bpf_attach_type. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429001614.1544-5-andriin@fb.com --- include/uapi/linux/bpf.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7e6541fceade..0eccafae55bb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -222,6 +222,15 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE +enum bpf_link_type { + BPF_LINK_TYPE_UNSPEC = 0, + BPF_LINK_TYPE_RAW_TRACEPOINT = 1, + BPF_LINK_TYPE_TRACING = 2, + BPF_LINK_TYPE_CGROUP = 3, + + MAX_BPF_LINK_TYPE, +}; + /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -3612,6 +3621,25 @@ struct bpf_btf_info { __u32 id; } __attribute__((aligned(8))); +struct bpf_link_info { + __u32 type; + __u32 id; + __u32 prog_id; + union { + struct { + __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */ + __u32 tp_name_len; /* in/out: tp_name buffer len */ + } raw_tracepoint; + struct { + __u32 attach_type; + } tracing; + struct { + __u64 cgroup_id; + __u32 attach_type; + } cgroup; + }; +} __attribute__((aligned(8))); + /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed * by user and intended to be used by socket (e.g. to bind to, depends on * attach attach type). -- cgit v1.2.3 From 97f9ac3db6612f14ac0c509e1a63ce14fd4cc0eb Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 21 Apr 2020 12:44:49 -0500 Subject: crypto: ccp - Add support for SEV-ES to the PSP driver To provide support for SEV-ES, the hypervisor must provide an area of memory to the PSP. Once this Trusted Memory Region (TMR) is provided to the PSP, the contents of this area of memory are no longer available to the x86. Update the PSP driver to allocate a 1MB region for the TMR that is 1MB aligned and then provide it to the PSP through the SEV INIT command. Signed-off-by: Tom Lendacky Reviewed-by: Brijesh Singh Reviewed-by: Joerg Roedel Signed-off-by: Herbert Xu --- include/uapi/linux/psp-sev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index 0549a5c622bf..91b4c63d5cbf 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h @@ -83,6 +83,8 @@ struct sev_user_data_status { __u32 guest_count; /* Out */ } __packed; +#define SEV_STATUS_FLAGS_CONFIG_ES 0x0100 + /** * struct sev_user_data_pek_csr - PEK_CSR command parameters * -- cgit v1.2.3 From 6e3a401fc8af01828bcdc92d713195d318b36e7e Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Thu, 30 Apr 2020 18:51:14 +0300 Subject: inet_diag: add cgroup id attribute This patch adds cgroup v2 ID to common inet diag message attributes. Cgroup v2 ID is kernfs ID (ino or ino+gen). This attribute allows filter inet diag output by cgroup ID obtained by name_to_handle_at() syscall. When net_cls or net_prio cgroup is activated this ID is equal to 1 (root cgroup ID) for newly created sockets. Some notes about this ID: 1) gets initialized in socket() syscall 2) incoming socket gets ID from listening socket (not during accept() syscall) 3) not changed when process get moved to another cgroup 4) can point to deleted cgroup (refcounting) v2: - use CONFIG_SOCK_CGROUP_DATA instead if CONFIG_CGROUPS v3: - fix attr size by using nla_total_size_64bit() (Eric Dumazet) - more detailed commit message (Konstantin Khlebnikov) Signed-off-by: Dmitry Yakunin Reviewed-by: Konstantin Khlebnikov Acked-By: Tejun Heo Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 57cc429a9177..c9b1e551792c 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -157,6 +157,7 @@ enum { INET_DIAG_MD5SIG, INET_DIAG_ULP_INFO, INET_DIAG_SK_BPF_STORAGES, + INET_DIAG_CGROUP_ID, __INET_DIAG_MAX, }; -- cgit v1.2.3 From b1f3e43dbfacfcd95296b0f80f84b186add9ef54 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Thu, 30 Apr 2020 18:51:15 +0300 Subject: inet_diag: add support for cgroup filter This patch adds ability to filter sockets based on cgroup v2 ID. Such filter is helpful in ss utility for filtering sockets by cgroup pathname. Signed-off-by: Dmitry Yakunin Reviewed-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index c9b1e551792c..e6f183ee8417 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -96,6 +96,7 @@ enum { INET_DIAG_BC_MARK_COND, INET_DIAG_BC_S_EQ, INET_DIAG_BC_D_EQ, + INET_DIAG_BC_CGROUP_COND, /* u64 cgroup v2 ID */ }; struct inet_diag_hostcond { -- cgit v1.2.3 From 06bfa47e72c83550fefc93c62a1ace5fff72e212 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 30 Apr 2020 18:04:31 +0200 Subject: docs: networking: convert timestamping.txt to ReST - add SPDX header; - add a document title; - adjust titles and chapters, adding proper markups; - mark code blocks and literals as such; - adjust identation, whitespaces and blank lines where needed; - add to networking/index.rst. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: David S. Miller --- include/uapi/linux/errqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index 0cca19670fd2..ca5cb3e3c6df 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -36,7 +36,7 @@ struct sock_extended_err { * * The timestamping interfaces SO_TIMESTAMPING, MSG_TSTAMP_* * communicate network timestamps by passing this struct in a cmsg with - * recvmsg(). See Documentation/networking/timestamping.txt for details. + * recvmsg(). See Documentation/networking/timestamping.rst for details. * User space sees a timespec definition that matches either * __kernel_timespec or __kernel_old_timespec, in the kernel we * require two structure definitions to provide both. -- cgit v1.2.3 From d07dcf9aadd6b2842b439e8668ff7ea2873f28d7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Apr 2020 22:13:12 +0200 Subject: netlink: add infrastructure to expose policies to userspace Add, and use in generic netlink, helpers to dump out a netlink policy to userspace, including all the range validation data, nested policies etc. This lets userspace discover what the kernel understands. For families/commands other than generic netlink, the helpers need to be used directly in an appropriate command, or we can add some infrastructure (a new netlink family) that those can register their policies with for introspection. I'm not that familiar with non-generic netlink, so that's left out for now. The data exposed to userspace also includes min and max length for binary/string data, I've done that instead of letting the userspace tools figure out whether min/max is intended based on the type so that we can extend this later in the kernel, we might want to just use the range data for example. Because of this, I opted to not directly expose the NLA_* values, even if some of them are already exposed via BPF, as with min/max length we don't need to have different types here for NLA_BINARY/NLA_MIN_LEN/NLA_EXACT_LEN, we just make them all NL_ATTR_TYPE_BINARY with min/max length optionally set. Similarly, we don't really need NLA_MSECS, and perhaps can remove it in the future - but not if we encode it into the userspace API now. It gets mapped to NL_ATTR_TYPE_U64 here. Note that the exposing here corresponds to the strict policy interpretation, and NLA_UNSPEC items are omitted entirely. To get those, change them to NLA_MIN_LEN which behaves in exactly the same way, but is exposed. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/genetlink.h | 2 + include/uapi/linux/netlink.h | 103 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index 877f7fa95466..9c0636ec2286 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -48,6 +48,7 @@ enum { CTRL_CMD_NEWMCAST_GRP, CTRL_CMD_DELMCAST_GRP, CTRL_CMD_GETMCAST_GRP, /* unused */ + CTRL_CMD_GETPOLICY, __CTRL_CMD_MAX, }; @@ -62,6 +63,7 @@ enum { CTRL_ATTR_MAXATTR, CTRL_ATTR_OPS, CTRL_ATTR_MCAST_GROUPS, + CTRL_ATTR_POLICY, __CTRL_ATTR_MAX, }; diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 0a4d73317759..eac8a6a648ea 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -249,4 +249,107 @@ struct nla_bitfield32 { __u32 selector; }; +/* + * policy descriptions - it's specific to each family how this is used + * Normally, it should be retrieved via a dump inside another attribute + * specifying where it applies. + */ + +/** + * enum netlink_attribute_type - type of an attribute + * @NL_ATTR_TYPE_INVALID: unused + * @NL_ATTR_TYPE_FLAG: flag attribute (present/not present) + * @NL_ATTR_TYPE_U8: 8-bit unsigned attribute + * @NL_ATTR_TYPE_U16: 16-bit unsigned attribute + * @NL_ATTR_TYPE_U32: 32-bit unsigned attribute + * @NL_ATTR_TYPE_U64: 64-bit unsigned attribute + * @NL_ATTR_TYPE_S8: 8-bit signed attribute + * @NL_ATTR_TYPE_S16: 16-bit signed attribute + * @NL_ATTR_TYPE_S32: 32-bit signed attribute + * @NL_ATTR_TYPE_S64: 64-bit signed attribute + * @NL_ATTR_TYPE_BINARY: binary data, min/max length may be specified + * @NL_ATTR_TYPE_STRING: string, min/max length may be specified + * @NL_ATTR_TYPE_NUL_STRING: NUL-terminated string, + * min/max length may be specified + * @NL_ATTR_TYPE_NESTED: nested, i.e. the content of this attribute + * consists of sub-attributes. The nested policy and maxtype + * inside may be specified. + * @NL_ATTR_TYPE_NESTED_ARRAY: nested array, i.e. the content of this + * attribute contains sub-attributes whose type is irrelevant + * (just used to separate the array entries) and each such array + * entry has attributes again, the policy for those inner ones + * and the corresponding maxtype may be specified. + * @NL_ATTR_TYPE_BITFIELD32: &struct nla_bitfield32 attribute + */ +enum netlink_attribute_type { + NL_ATTR_TYPE_INVALID, + + NL_ATTR_TYPE_FLAG, + + NL_ATTR_TYPE_U8, + NL_ATTR_TYPE_U16, + NL_ATTR_TYPE_U32, + NL_ATTR_TYPE_U64, + + NL_ATTR_TYPE_S8, + NL_ATTR_TYPE_S16, + NL_ATTR_TYPE_S32, + NL_ATTR_TYPE_S64, + + NL_ATTR_TYPE_BINARY, + NL_ATTR_TYPE_STRING, + NL_ATTR_TYPE_NUL_STRING, + + NL_ATTR_TYPE_NESTED, + NL_ATTR_TYPE_NESTED_ARRAY, + + NL_ATTR_TYPE_BITFIELD32, +}; + +/** + * enum netlink_policy_type_attr - policy type attributes + * @NL_POLICY_TYPE_ATTR_UNSPEC: unused + * @NL_POLICY_TYPE_ATTR_TYPE: type of the attribute, + * &enum netlink_attribute_type (U32) + * @NL_POLICY_TYPE_ATTR_MIN_VALUE_S: minimum value for signed + * integers (S64) + * @NL_POLICY_TYPE_ATTR_MAX_VALUE_S: maximum value for signed + * integers (S64) + * @NL_POLICY_TYPE_ATTR_MIN_VALUE_U: minimum value for unsigned + * integers (U64) + * @NL_POLICY_TYPE_ATTR_MAX_VALUE_U: maximum value for unsigned + * integers (U64) + * @NL_POLICY_TYPE_ATTR_MIN_LENGTH: minimum length for binary + * attributes, no minimum if not given (U32) + * @NL_POLICY_TYPE_ATTR_MAX_LENGTH: maximum length for binary + * attributes, no maximum if not given (U32) + * @NL_POLICY_TYPE_ATTR_POLICY_IDX: sub policy for nested and + * nested array types (U32) + * @NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE: maximum sub policy + * attribute for nested and nested array types, this can + * in theory be < the size of the policy pointed to by + * the index, if limited inside the nesting (U32) + * @NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: valid mask for the + * bitfield32 type (U32) + * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment + */ +enum netlink_policy_type_attr { + NL_POLICY_TYPE_ATTR_UNSPEC, + NL_POLICY_TYPE_ATTR_TYPE, + NL_POLICY_TYPE_ATTR_MIN_VALUE_S, + NL_POLICY_TYPE_ATTR_MAX_VALUE_S, + NL_POLICY_TYPE_ATTR_MIN_VALUE_U, + NL_POLICY_TYPE_ATTR_MAX_VALUE_U, + NL_POLICY_TYPE_ATTR_MIN_LENGTH, + NL_POLICY_TYPE_ATTR_MAX_LENGTH, + NL_POLICY_TYPE_ATTR_POLICY_IDX, + NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE, + NL_POLICY_TYPE_ATTR_BITFIELD32_MASK, + NL_POLICY_TYPE_ATTR_PAD, + + /* keep last */ + __NL_POLICY_TYPE_ATTR_MAX, + NL_POLICY_TYPE_ATTR_MAX = __NL_POLICY_TYPE_ATTR_MAX - 1 +}; + #endif /* _UAPI__LINUX_NETLINK_H */ -- cgit v1.2.3 From 0aeaaf64e6d06e353de15dcf9973312ae0672ca1 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 29 Apr 2020 19:36:06 -0400 Subject: drm/amdkfd: Fix comment formatting Corrected two function names. Added a missing space. Signed-off-by: Felix Kuehling Reviewed-by: Kent Russell Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 4f6676428c5c..b6be62356d34 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -251,7 +251,7 @@ struct kfd_memory_exception_failure { __u32 imprecise; /* Can't determine the exact fault address */ }; -/* memory exception data*/ +/* memory exception data */ struct kfd_hsa_memory_exception_data { struct kfd_memory_exception_failure failure; __u64 va; -- cgit v1.2.3 From d46edd671a147032e22cfeb271a5734703093649 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 30 Apr 2020 00:15:04 -0700 Subject: bpf: Sharing bpf runtime stats with BPF_ENABLE_STATS Currently, sysctl kernel.bpf_stats_enabled controls BPF runtime stats. Typical userspace tools use kernel.bpf_stats_enabled as follows: 1. Enable kernel.bpf_stats_enabled; 2. Check program run_time_ns; 3. Sleep for the monitoring period; 4. Check program run_time_ns again, calculate the difference; 5. Disable kernel.bpf_stats_enabled. The problem with this approach is that only one userspace tool can toggle this sysctl. If multiple tools toggle the sysctl at the same time, the measurement may be inaccurate. To fix this problem while keep backward compatibility, introduce a new bpf command BPF_ENABLE_STATS. On success, this command enables stats and returns a valid fd. BPF_ENABLE_STATS takes argument "type". Currently, only one type, BPF_STATS_RUN_TIME, is supported. We can extend the command to support other types of stats in the future. With BPF_ENABLE_STATS, user space tool would have the following flow: 1. Get a fd with BPF_ENABLE_STATS, and make sure it is valid; 2. Check program run_time_ns; 3. Sleep for the monitoring period; 4. Check program run_time_ns again, calculate the difference; 5. Close the fd. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200430071506.1408910-2-songliubraving@fb.com --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0eccafae55bb..705e4822f997 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -115,6 +115,7 @@ enum bpf_cmd { BPF_LINK_UPDATE, BPF_LINK_GET_FD_BY_ID, BPF_LINK_GET_NEXT_ID, + BPF_ENABLE_STATS, }; enum bpf_map_type { @@ -390,6 +391,12 @@ enum { */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) +/* type for BPF_ENABLE_STATS */ +enum bpf_stats_type { + /* enabled run_time_ns and run_cnt */ + BPF_STATS_RUN_TIME = 0, +}; + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, @@ -601,6 +608,10 @@ union bpf_attr { __u32 old_prog_fd; } link_update; + struct { /* struct used by BPF_ENABLE_STATS command */ + __u32 type; + } enable_stats; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF -- cgit v1.2.3 From 883780af72090daf9ab53779a3085a6ddfc468ca Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 1 May 2020 16:44:27 +0200 Subject: docs: networking: convert x25-iface.txt to ReST Not much to be done here: - add SPDX header; - adjust title markup; - remove a tail whitespace; - add to networking/index.rst. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: David S. Miller --- include/uapi/linux/if_x25.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_x25.h b/include/uapi/linux/if_x25.h index 5d962448345f..3a5938e38370 100644 --- a/include/uapi/linux/if_x25.h +++ b/include/uapi/linux/if_x25.h @@ -18,7 +18,7 @@ #include -/* Documentation/networking/x25-iface.txt */ +/* Documentation/networking/x25-iface.rst */ #define X25_IFACE_DATA 0x00 #define X25_IFACE_CONNECT 0x01 #define X25_IFACE_DISCONNECT 0x02 -- cgit v1.2.3 From beecf11bc2188067824591612151c4dc6ec383c7 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 30 Apr 2020 16:31:52 -0700 Subject: bpf: Bpf_{g,s}etsockopt for struct bpf_sock_addr Currently, bpf_getsockopt and bpf_setsockopt helpers operate on the 'struct bpf_sock_ops' context in BPF_PROG_TYPE_SOCK_OPS program. Let's generalize them and make them available for 'struct bpf_sock_addr'. That way, in the future, we can allow those helpers in more places. As an example, let's expose those 'struct bpf_sock_addr' based helpers to BPF_CGROUP_INET{4,6}_CONNECT hooks. That way we can override CC before the connection is made. v3: * Expose custom helpers for bpf_sock_addr context instead of doing generic bpf_sock argument (as suggested by Daniel). Even with try_socket_lock that doesn't sleep we have a problem where context sk is already locked and socket lock is non-nestable. v2: * s/BPF_PROG_TYPE_CGROUP_SOCKOPT/BPF_PROG_TYPE_SOCK_OPS/ Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200430233152.199403-1-sdf@google.com --- include/uapi/linux/bpf.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 705e4822f997..b3643e27e264 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1587,7 +1587,7 @@ union bpf_attr { * Return * 0 * - * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) + * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **setsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1595,6 +1595,11 @@ union bpf_attr { * must be specified, see **setsockopt(2)** for more information. * The option value of length *optlen* is pointed by *optval*. * + * *bpf_socket* should be one of the following: + * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** + * and **BPF_CGROUP_INET6_CONNECT**. + * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: * @@ -1789,7 +1794,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) + * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **getsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1798,6 +1803,11 @@ union bpf_attr { * The retrieved value is stored in the structure pointed by * *opval* and of length *optlen*. * + * *bpf_socket* should be one of the following: + * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** + * and **BPF_CGROUP_INET6_CONNECT**. + * * This helper actually implements a subset of **getsockopt()**. * It supports the following *level*\ s: * -- cgit v1.2.3 From a51c328df3106663879645680609eb49b3ff6444 Mon Sep 17 00:00:00 2001 From: Po Liu Date: Fri, 1 May 2020 08:53:15 +0800 Subject: net: qos: introduce a gate control flow action Introduce a ingress frame gate control flow action. Tc gate action does the work like this: Assume there is a gate allow specified ingress frames can be passed at specific time slot, and be dropped at specific time slot. Tc filter chooses the ingress frames, and tc gate action would specify what slot does these frames can be passed to device and what time slot would be dropped. Tc gate action would provide an entry list to tell how much time gate keep open and how much time gate keep state close. Gate action also assign a start time to tell when the entry list start. Then driver would repeat the gate entry list cyclically. For the software simulation, gate action requires the user assign a time clock type. Below is the setting example in user space. Tc filter a stream source ip address is 192.168.0.20 and gate action own two time slots. One is last 200ms gate open let frame pass another is last 100ms gate close let frames dropped. When the ingress frames have reach total frames over 8000000 bytes, the excessive frames will be dropped in that 200000000ns time slot. > tc qdisc add dev eth0 ingress > tc filter add dev eth0 parent ffff: protocol ip \ flower src_ip 192.168.0.20 \ action gate index 2 clockid CLOCK_TAI \ sched-entry open 200000000 -1 8000000 \ sched-entry close 100000000 -1 -1 > tc chain del dev eth0 ingress chain 0 "sched-entry" follow the name taprio style. Gate state is "open"/"close". Follow with period nanosecond. Then next item is internal priority value means which ingress queue should put. "-1" means wildcard. The last value optional specifies the maximum number of MSDU octets that are permitted to pass the gate during the specified time interval. Base-time is not set will be 0 as default, as result start time would be ((N + 1) * cycletime) which is the minimal of future time. Below example shows filtering a stream with destination mac address is 10:00:80:00:00:00 and ip type is ICMP, follow the action gate. The gate action would run with one close time slot which means always keep close. The time cycle is total 200000000ns. The base-time would calculate by: 1357000000000 + (N + 1) * cycletime When the total value is the future time, it will be the start time. The cycletime here would be 200000000ns for this case. > tc filter add dev eth0 parent ffff: protocol ip \ flower skip_hw ip_proto icmp dst_mac 10:00:80:00:00:00 \ action gate index 12 base-time 1357000000000 \ sched-entry close 200000000 -1 -1 \ clockid CLOCK_TAI Signed-off-by: Po Liu Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 1 + include/uapi/linux/tc_act/tc_gate.h | 47 +++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 include/uapi/linux/tc_act/tc_gate.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 9f06d29cab70..fc672b232437 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -134,6 +134,7 @@ enum tca_id { TCA_ID_CTINFO, TCA_ID_MPLS, TCA_ID_CT, + TCA_ID_GATE, /* other actions go here */ __TCA_ID_MAX = 255 }; diff --git a/include/uapi/linux/tc_act/tc_gate.h b/include/uapi/linux/tc_act/tc_gate.h new file mode 100644 index 000000000000..f214b3a6d44f --- /dev/null +++ b/include/uapi/linux/tc_act/tc_gate.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* Copyright 2020 NXP */ + +#ifndef __LINUX_TC_GATE_H +#define __LINUX_TC_GATE_H + +#include + +struct tc_gate { + tc_gen; +}; + +enum { + TCA_GATE_ENTRY_UNSPEC, + TCA_GATE_ENTRY_INDEX, + TCA_GATE_ENTRY_GATE, + TCA_GATE_ENTRY_INTERVAL, + TCA_GATE_ENTRY_IPV, + TCA_GATE_ENTRY_MAX_OCTETS, + __TCA_GATE_ENTRY_MAX, +}; +#define TCA_GATE_ENTRY_MAX (__TCA_GATE_ENTRY_MAX - 1) + +enum { + TCA_GATE_ONE_ENTRY_UNSPEC, + TCA_GATE_ONE_ENTRY, + __TCA_GATE_ONE_ENTRY_MAX, +}; +#define TCA_GATE_ONE_ENTRY_MAX (__TCA_GATE_ONE_ENTRY_MAX - 1) + +enum { + TCA_GATE_UNSPEC, + TCA_GATE_TM, + TCA_GATE_PARMS, + TCA_GATE_PAD, + TCA_GATE_PRIORITY, + TCA_GATE_ENTRY_LIST, + TCA_GATE_BASE_TIME, + TCA_GATE_CYCLE_TIME, + TCA_GATE_CYCLE_TIME_EXT, + TCA_GATE_FLAGS, + TCA_GATE_CLOCKID, + __TCA_GATE_MAX, +}; +#define TCA_GATE_MAX (__TCA_GATE_MAX - 1) + +#endif -- cgit v1.2.3 From d3f1cbd29fa63f1bb608603a6cd54ca7af56a68b Mon Sep 17 00:00:00 2001 From: Vincent Cheng Date: Fri, 1 May 2020 23:35:37 -0400 Subject: ptp: Add adjust_phase to ptp_clock_caps capability. Add adjust_phase to ptp_clock_caps capability to allow user to query if a PHC driver supports adjust phase with ioctl PTP_CLOCK_GETCAPS command. Signed-off-by: Vincent Cheng Reviewed-by: Richard Cochran Signed-off-by: David S. Miller --- include/uapi/linux/ptp_clock.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 9dc9d0079e98..ff070aa64278 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -89,7 +89,9 @@ struct ptp_clock_caps { int n_pins; /* Number of input/output pins. */ /* Whether the clock supports precise system-device cross timestamps */ int cross_timestamping; - int rsv[13]; /* Reserved for future use. */ + /* Whether the clock supports adjust phase */ + int adjust_phase; + int rsv[12]; /* Reserved for future use. */ }; struct ptp_extts_request { -- cgit v1.2.3 From 712b2698e4c024b561694cbcc1abba13eb0fd9ce Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 30 Apr 2020 07:41:34 -0700 Subject: fs/stat: Define DAX statx attribute In order for users to determine if a file is currently operating in DAX state (effective DAX). Define a statx attribute value and set that attribute if the effective DAX flag is set. To go along with this we propose the following addition to the statx man page: STATX_ATTR_DAX The file is in the DAX (cpu direct access) state. DAX state attempts to minimize software cache effects for both I/O and memory mappings of this file. It requires a file system which has been configured to support DAX. DAX generally assumes all accesses are via cpu load / store instructions which can minimize overhead for small accesses, but may adversely affect cpu utilization for large transfers. File I/O is done directly to/from user-space buffers and memory mapped I/O may be performed with direct memory mappings that bypass kernel page cache. While the DAX property tends to result in data being transferred synchronously, it does not give the same guarantees of O_SYNC where data and the necessary metadata are transferred together. A DAX file may support being mapped with the MAP_SYNC flag, which enables a program to use CPU cache flush instructions to persist CPU store operations without an explicit fsync(2). See mmap(2) for more information. Reviewed-by: Dave Chinner Reviewed-by: Jan Kara Reviewed-by: Darrick J. Wong Signed-off-by: Ira Weiny Signed-off-by: Darrick J. Wong --- include/uapi/linux/stat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index ad80a5c885d5..e5f9d5517f6b 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -169,6 +169,7 @@ struct statx { #define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ +#define STATX_ATTR_DAX 0x00002000 /* [I] File is DAX */ #endif /* _UAPI_LINUX_STAT_H */ -- cgit v1.2.3 From 39d010504e6b4485d7ceee167743620dd33f4417 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 May 2020 07:07:41 -0700 Subject: net_sched: sch_fq: add horizon attribute QUIC servers would like to use SO_TXTIME, without having CAP_NET_ADMIN, to efficiently pace UDP packets. As far as sch_fq is concerned, we need to add safety checks, so that a buggy application does not fill the qdisc with packets having delivery time far in the future. This patch adds a configurable horizon (default: 10 seconds), and a configurable policy when a packet is beyond the horizon at enqueue() time: - either drop the packet (default policy) - or cap its delivery time to the horizon. $ tc -s -d qd sh dev eth0 qdisc fq 8022: root refcnt 257 limit 10000p flow_limit 100p buckets 1024 orphan_mask 1023 quantum 10Kb initial_quantum 51160b low_rate_threshold 550Kbit refill_delay 40.0ms timer_slack 10.000us horizon 10.000s Sent 1234215879 bytes 837099 pkt (dropped 21, overlimits 0 requeues 6) backlog 0b 0p requeues 6 flows 1191 (inactive 1177 throttled 0) gc 0 highprio 0 throttled 692 latency 11.480us pkts_too_long 0 alloc_errors 0 horizon_drops 21 horizon_caps 0 v2: fixed an overflow on 32bit kernels in fq_init(), reported by kbuild test robot Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 0c02737c8f47..a95f3ae7ab37 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -913,6 +913,10 @@ enum { TCA_FQ_TIMER_SLACK, /* timer slack */ + TCA_FQ_HORIZON, /* time horizon in us */ + + TCA_FQ_HORIZON_DROP, /* drop packets beyond horizon, or cap their EDT */ + __TCA_FQ_MAX }; @@ -932,6 +936,8 @@ struct tc_fq_qd_stats { __u32 throttled_flows; __u32 unthrottle_latency_ns; __u64 ce_mark; /* packets above ce_threshold */ + __u64 horizon_drops; + __u64 horizon_caps; }; /* Heavy-Hitter Filter */ -- cgit v1.2.3 From f645e6256bd1b12523b759fcc610861fb21c24c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Tue, 21 Apr 2020 15:57:38 +0200 Subject: media: v4l2-dev/ioctl: Add V4L2_CAP_IO_MC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a video device capability flag to indicate that its inputs and/or outputs are controlled by the Media Controller instead of the V4L2 API. When this flag is set, ioctl for enum inputs and outputs are automatically enabled and programmed to call a helper function. Suggested-by: Hans Verkuil Signed-off-by: Helen Koike Signed-off-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 9817b7e2c968..b18f3f7cde31 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -487,6 +487,8 @@ struct v4l2_capability { #define V4L2_CAP_TOUCH 0x10000000 /* Is a touch device */ +#define V4L2_CAP_IO_MC 0x20000000 /* Is input/output controlled by the media controller */ + #define V4L2_CAP_DEVICE_CAPS 0x80000000 /* sets device capabilities field */ /* -- cgit v1.2.3 From e5b6b07a1b45dd9d19bec1fa1d60750b0fcf2fb0 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 24 Apr 2020 15:43:31 +0200 Subject: media: v4l2: Extend VIDIOC_ENUM_FMT to support MC-centric devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VIDIOC_ENUM_FMT ioctl enumerates all formats supported by a video node. For MC-centric devices, its behaviour has always been ill-defined, with drivers implementing one of the following behaviours: - No support for VIDIOC_ENUM_FMT at all - Enumerating all formats supported by the video node, regardless of the configuration of the pipeline - Enumerating formats supported by the video node for the active configuration of the connected subdevice The first behaviour is obviously useless for applications. The second behaviour provides the most information, but doesn't offer a way to find what formats are compatible with a given pipeline configuration. The third behaviour fixes that, but with the drawback that applications can't enumerate all supported formats anymore, and have to modify the active configuration of the pipeline to enumerate formats. The situation is messy as none of the implemented behaviours are ideal, and userspace can't predict what will happen as the behaviour is driver-specific. To fix this, let's extend the VIDIOC_ENUM_FMT with a missing capability: enumerating pixel formats for a given media bus code. The media bus code is passed through the v4l2_fmtdesc structure in a new mbus_code field (repurposed from the reserved fields). With this capability in place, applications can enumerate pixel formats for a given media bus code without modifying the active configuration of the device. The current behaviour of the ioctl is preserved when the new mbus_code field is set to 0, ensuring compatibility with existing userspace. The API extension is documented as mandatory for MC-centric devices (as advertised through the V4L2_CAP_IO_MC capability), allowing applications and compliance tools to easily determine the availability of the VIDIOC_ENUM_FMT extension. Signed-off-by: Laurent Pinchart Acked-by: Sakari Ailus Signed-off-by: Niklas Söderlund Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index b18f3f7cde31..c3a1cf1c507f 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -784,7 +784,8 @@ struct v4l2_fmtdesc { __u32 flags; __u8 description[32]; /* Description string */ __u32 pixelformat; /* Format fourcc */ - __u32 reserved[4]; + __u32 mbus_code; /* Media bus code */ + __u32 reserved[3]; }; #define V4L2_FMT_FLAG_COMPRESSED 0x0001 -- cgit v1.2.3 From bdbdac7649fac05f88c9f7ab18121a17fb591687 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 5 May 2020 08:35:05 +0200 Subject: ethtool: provide UAPI for PHY master/slave configuration. This UAPI is needed for BroadR-Reach 100BASE-T1 devices. Due to lack of auto-negotiation support, we needed to be able to configure the MASTER-SLAVE role of the port manually or from an application in user space. The same UAPI can be used for 1000BASE-T or MultiGBASE-T devices to force MASTER or SLAVE role. See IEEE 802.3-2018: 22.2.4.3.7 MASTER-SLAVE control register (Register 9) 22.2.4.3.8 MASTER-SLAVE status register (Register 10) 40.5.2 MASTER-SLAVE configuration resolution 45.2.1.185.1 MASTER-SLAVE config value (1.2100.14) 45.2.7.10 MultiGBASE-T AN control 1 register (Register 7.32) The MASTER-SLAVE role affects the clock configuration: ------------------------------------------------------------------------------- When the PHY is configured as MASTER, the PMA Transmit function shall source TX_TCLK from a local clock source. When configured as SLAVE, the PMA Transmit function shall source TX_TCLK from the clock recovered from data stream provided by MASTER. iMX6Q KSZ9031 XXX ------\ /-----------\ /------------\ | | | | | MAC |<----RGMII----->| PHY Slave |<------>| PHY Master | |<--- 125 MHz ---+-<------/ | | \ | ------/ \-----------/ \------------/ ^ \-TX_TCLK ------------------------------------------------------------------------------- Since some clock or link related issues are only reproducible in a specific MASTER-SLAVE-role, MAC and PHY configuration, it is beneficial to provide generic (not 100BASE-T1 specific) interface to the user space for configuration flexibility and trouble shooting. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 16 +++++++++++++++- include/uapi/linux/ethtool_netlink.h | 2 ++ include/uapi/linux/mii.h | 2 ++ 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 92f737f10117..f4662b3a9e1e 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1666,6 +1666,18 @@ static inline int ethtool_validate_duplex(__u8 duplex) return 0; } +#define MASTER_SLAVE_CFG_UNSUPPORTED 0 +#define MASTER_SLAVE_CFG_UNKNOWN 1 +#define MASTER_SLAVE_CFG_MASTER_PREFERRED 2 +#define MASTER_SLAVE_CFG_SLAVE_PREFERRED 3 +#define MASTER_SLAVE_CFG_MASTER_FORCE 4 +#define MASTER_SLAVE_CFG_SLAVE_FORCE 5 +#define MASTER_SLAVE_STATE_UNSUPPORTED 0 +#define MASTER_SLAVE_STATE_UNKNOWN 1 +#define MASTER_SLAVE_STATE_MASTER 2 +#define MASTER_SLAVE_STATE_SLAVE 3 +#define MASTER_SLAVE_STATE_ERR 4 + /* Which connector port. */ #define PORT_TP 0x00 #define PORT_AUI 0x01 @@ -1904,7 +1916,9 @@ struct ethtool_link_settings { __u8 eth_tp_mdix_ctrl; __s8 link_mode_masks_nwords; __u8 transceiver; - __u8 reserved1[3]; + __u8 master_slave_cfg; + __u8 master_slave_state; + __u8 reserved1[1]; __u32 reserved[7]; __u32 link_mode_masks[0]; /* layout of link_mode_masks fields: diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 7fde76366ba4..bf1d310e20bc 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -216,6 +216,8 @@ enum { ETHTOOL_A_LINKMODES_PEER, /* bitset */ ETHTOOL_A_LINKMODES_SPEED, /* u32 */ ETHTOOL_A_LINKMODES_DUPLEX, /* u8 */ + ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, /* u8 */ + ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, /* u8 */ /* add new constants above here */ __ETHTOOL_A_LINKMODES_CNT, diff --git a/include/uapi/linux/mii.h b/include/uapi/linux/mii.h index 90f9b4e1ba27..39f7c44baf53 100644 --- a/include/uapi/linux/mii.h +++ b/include/uapi/linux/mii.h @@ -151,11 +151,13 @@ /* 1000BASE-T Control register */ #define ADVERTISE_1000FULL 0x0200 /* Advertise 1000BASE-T full duplex */ #define ADVERTISE_1000HALF 0x0100 /* Advertise 1000BASE-T half duplex */ +#define CTL1000_PREFER_MASTER 0x0400 /* prefer to operate as master */ #define CTL1000_AS_MASTER 0x0800 #define CTL1000_ENABLE_MASTER 0x1000 /* 1000BASE-T Status register */ #define LPA_1000MSFAIL 0x8000 /* Master/Slave resolution failure */ +#define LPA_1000MSRES 0x4000 /* Master/Slave resolution status */ #define LPA_1000LOCALRXOK 0x2000 /* Link partner local receiver status */ #define LPA_1000REMRXOK 0x1000 /* Link partner remote receiver status */ #define LPA_1000FULL 0x0800 /* Link partner 1000BASE-T full duplex */ -- cgit v1.2.3 From 8086fbaf49345f988deec539ec8e182b02914401 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 8 May 2020 10:46:11 -0700 Subject: bpf: Allow any port in bpf_bind helper We want to have a tighter control on what ports we bind to in the BPF_CGROUP_INET{4,6}_CONNECT hooks even if it means connect() becomes slightly more expensive. The expensive part comes from the fact that we now need to call inet_csk_get_port() that verifies that the port is not used and allocates an entry in the hash table for it. Since we can't rely on "snum || !bind_address_no_port" to prevent us from calling POST_BIND hook anymore, let's add another bind flag to indicate that the call site is BPF program. v5: * fix wrong AF_INET (should be AF_INET6) in the bpf program for v6 v3: * More bpf_bind documentation refinements (Martin KaFai Lau) * Add UDP tests as well (Martin KaFai Lau) * Don't start the thread, just do socket+bind+listen (Martin KaFai Lau) v2: * Update documentation (Andrey Ignatov) * Pass BIND_FORCE_ADDRESS_NO_PORT conditionally (Andrey Ignatov) Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Andrey Ignatov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200508174611.228805-5-sdf@google.com --- include/uapi/linux/bpf.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b3643e27e264..6e5e7caa3739 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1994,10 +1994,11 @@ union bpf_attr { * * This helper works for IPv4 and IPv6, TCP and UDP sockets. The * domain (*addr*\ **->sa_family**) must be **AF_INET** (or - * **AF_INET6**). Looking for a free port to bind to can be - * expensive, therefore binding to port is not permitted by the - * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively) - * must be set to zero. + * **AF_INET6**). It's advised to pass zero port (**sin_port** + * or **sin6_port**) which triggers IP_BIND_ADDRESS_NO_PORT-like + * behavior and lets the kernel efficiently pick up an unused + * port as long as 4-tuple is unique. Passing non-zero port might + * lead to degraded performance. * Return * 0 on success, or a negative error in case of failure. * -- cgit v1.2.3 From 15d83c4d7cef5c067a8b075ce59e97df4f60706e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:00 -0700 Subject: bpf: Allow loading of a bpf_iter program A bpf_iter program is a tracing program with attach type BPF_TRACE_ITER. The load attribute attach_btf_id is used by the verifier against a particular kernel function, which represents a target, e.g., __bpf_iter__bpf_map for target bpf_map which is implemented later. The program return value must be 0 or 1 for now. 0 : successful, except potential seq_file buffer overflow which is handled by seq_file reader. 1 : request to restart the same object In the future, other return values may be used for filtering or teminating the iterator. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175900.2474947-1-yhs@fb.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6e5e7caa3739..c8a5325cc8d0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -218,6 +218,7 @@ enum bpf_attach_type { BPF_TRACE_FEXIT, BPF_MODIFY_RETURN, BPF_LSM_MAC, + BPF_TRACE_ITER, __MAX_BPF_ATTACH_TYPE }; -- cgit v1.2.3 From de4e05cac46d206f9090051ef09930514bff73e4 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:01 -0700 Subject: bpf: Support bpf tracing/iter programs for BPF_LINK_CREATE Given a bpf program, the step to create an anonymous bpf iterator is: - create a bpf_iter_link, which combines bpf program and the target. In the future, there could be more information recorded in the link. A link_fd will be returned to the user space. - create an anonymous bpf iterator with the given link_fd. The bpf_iter_link can be pinned to bpffs mount file system to create a file based bpf iterator as well. The benefit to use of bpf_iter_link: - using bpf link simplifies design and implementation as bpf link is used for other tracing bpf programs. - for file based bpf iterator, bpf_iter_link provides a standard way to replace underlying bpf programs. - for both anonymous and free based iterators, bpf link query capability can be leveraged. The patch added support of tracing/iter programs for BPF_LINK_CREATE. A new link type BPF_LINK_TYPE_ITER is added to facilitate link querying. Currently, only prog_id is needed, so there is no additional in-kernel show_fdinfo() and fill_link_info() hook is needed for BPF_LINK_TYPE_ITER link. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175901.2475084-1-yhs@fb.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c8a5325cc8d0..1e8dfff5d5d4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -229,6 +229,7 @@ enum bpf_link_type { BPF_LINK_TYPE_RAW_TRACEPOINT = 1, BPF_LINK_TYPE_TRACING = 2, BPF_LINK_TYPE_CGROUP = 3, + BPF_LINK_TYPE_ITER = 4, MAX_BPF_LINK_TYPE, }; -- cgit v1.2.3 From ac51d99bf81caac8d8881fe52098948110d0de68 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:05 -0700 Subject: bpf: Create anonymous bpf iterator A new bpf command BPF_ITER_CREATE is added. The anonymous bpf iterator is seq_file based. The seq_file private data are referenced by targets. The bpf_iter infrastructure allocated additional space at seq_file->private before the space used by targets to store some meta data, e.g., prog: prog to run session_id: an unique id for each opened seq_file seq_num: how many times bpf programs are queried in this session done_stop: an internal state to decide whether bpf program should be called in seq_ops->stop() or not The seq_num will start from 0 for valid objects. The bpf program may see the same seq_num more than once if - seq_file buffer overflow happens and the same object is retried by bpf_seq_read(), or - the bpf program explicitly requests a retry of the same object Since module is not supported for bpf_iter, all target registeration happens at __init time, so there is no need to change bpf_iter_unreg_target() as it is used mostly in error path of the init function at which time no bpf iterators have been created yet. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175905.2475770-1-yhs@fb.com --- include/uapi/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1e8dfff5d5d4..708763f702e1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -116,6 +116,7 @@ enum bpf_cmd { BPF_LINK_GET_FD_BY_ID, BPF_LINK_GET_NEXT_ID, BPF_ENABLE_STATS, + BPF_ITER_CREATE, }; enum bpf_map_type { @@ -614,6 +615,11 @@ union bpf_attr { __u32 type; } enable_stats; + struct { /* struct used by BPF_ITER_CREATE command */ + __u32 link_fd; + __u32 flags; + } iter_create; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF -- cgit v1.2.3 From 492e639f0c222784e2e0f121966375f641c61b15 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:14 -0700 Subject: bpf: Add bpf_seq_printf and bpf_seq_write helpers Two helpers bpf_seq_printf and bpf_seq_write, are added for writing data to the seq_file buffer. bpf_seq_printf supports common format string flag/width/type fields so at least I can get identical results for netlink and ipv6_route targets. For bpf_seq_printf and bpf_seq_write, return value -EOVERFLOW specifically indicates a write failure due to overflow, which means the object will be repeated in the next bpf invocation if object collection stays the same. Note that if the object collection is changed, depending how collection traversal is done, even if the object still in the collection, it may not be visited. For bpf_seq_printf, format %s, %p{i,I}{4,6} needs to read kernel memory. Reading kernel memory may fail in the following two cases: - invalid kernel address, or - valid kernel address but requiring a major fault If reading kernel memory failed, the %s string will be an empty string and %p{i,I}{4,6} will be all 0. Not returning error to bpf program is consistent with what bpf_trace_printk() does for now. bpf_seq_printf may return -EBUSY meaning that internal percpu buffer for memory copy of strings or other pointees is not available. Bpf program can return 1 to indicate it wants the same object to be repeated. Right now, this should not happen on no-RT kernels since migrate_disable(), which guards bpf prog call, calls preempt_disable(). Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175914.2476661-1-yhs@fb.com --- include/uapi/linux/bpf.h | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 708763f702e1..9d1932e23cec 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3077,6 +3077,41 @@ union bpf_attr { * See: clock_gettime(CLOCK_BOOTTIME) * Return * Current *ktime*. + * + * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * Description + * seq_printf uses seq_file seq_printf() to print out the format string. + * The *m* represents the seq_file. The *fmt* and *fmt_size* are for + * the format string itself. The *data* and *data_len* are format string + * arguments. The *data* are a u64 array and corresponding format string + * values are stored in the array. For strings and pointers where pointees + * are accessed, only the pointer values are stored in the *data* array. + * The *data_len* is the *data* size in term of bytes. + * + * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. + * Reading kernel memory may fail due to either invalid address or + * valid address but requiring a major memory fault. If reading kernel memory + * fails, the string for **%s** will be an empty string, and the ip + * address for **%p{i,I}{4,6}** will be 0. Not returning error to + * bpf program is consistent with what bpf_trace_printk() does for now. + * Return + * 0 on success, or a negative errno in case of failure. + * + * * **-EBUSY** Percpu memory copy buffer is busy, can try again + * by returning 1 from bpf program. + * * **-EINVAL** Invalid arguments, or invalid/unsupported formats. + * * **-E2BIG** Too many format specifiers. + * * **-EOVERFLOW** Overflow happens, the same object will be tried again. + * + * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) + * Description + * seq_write uses seq_file seq_write() to write the data. + * The *m* represents the seq_file. The *data* and *len* represent the + * data to write in bytes. + * Return + * 0 on success, or a negative errno in case of failure. + * + * * **-EOVERFLOW** Overflow happens, the same object will be tried again. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3204,7 +3239,9 @@ union bpf_attr { FN(get_netns_cookie), \ FN(get_current_ancestor_cgroup_id), \ FN(sk_assign), \ - FN(ktime_get_boot_ns), + FN(ktime_get_boot_ns), \ + FN(seq_printf), \ + FN(seq_write), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 11ca3c4261cdb4e2f33e32daf6447f8185843317 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 10 May 2020 21:12:33 +0200 Subject: net: ethtool: netlink: Add support for triggering a cable test Add new ethtool netlink calls to trigger the starting of a PHY cable test. Add Kconfig'ury to ETHTOOL_NETLINK so that PHYLIB is not a module when ETHTOOL_NETLINK is builtin, which would result in kernel linking errors. v2: Remove unwanted white space change Remove ethnl_cable_test_act_ops and use doit handler Rename cable_test_set_policy cable_test_act_policy Remove ETHTOOL_MSG_CABLE_TEST_ACT_REPLY v3: Remove ETHTOOL_MSG_CABLE_TEST_ACT_REPLY from documentation Remove unused cable_test_get_policy Add Reviewed-by tags v4: Remove unwanted blank line Signed-off-by: Andrew Lunn Reviewed-by: Michal Kubecek Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index bf1d310e20bc..6bfd648c32cf 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -39,6 +39,7 @@ enum { ETHTOOL_MSG_EEE_GET, ETHTOOL_MSG_EEE_SET, ETHTOOL_MSG_TSINFO_GET, + ETHTOOL_MSG_CABLE_TEST_ACT, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -405,6 +406,17 @@ enum { ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) }; +/* CABLE TEST */ + +enum { + ETHTOOL_A_CABLE_TEST_UNSPEC, + ETHTOOL_A_CABLE_TEST_HEADER, /* nest - _A_HEADER_* */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_CNT, + ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1 +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3 From b28efb930ba5a7c263826fe02e13e1b6eadb5559 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 10 May 2020 21:12:34 +0200 Subject: net: ethtool: Add attributes for cable test reports Add the attributes needed to report cable test results to userspace. The reports are expected to be per twisted pair. A nested property per pair can report the result of the cable test. A nested property can also report the length of the cable to any fault. v2: Grammar fixes Change length from u16 to u32 s/DEV/HEADER/g Add status attributes Rename pairs from numbers to letters. v3: Fixed example in document Add ETHTOOL_A_CABLE_NEST_* enum Add ETHTOOL_MSG_CABLE_TEST_NTF to documentation Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Michal Kubecek Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 59 ++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 6bfd648c32cf..2881af411f76 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -75,6 +75,7 @@ enum { ETHTOOL_MSG_EEE_GET_REPLY, ETHTOOL_MSG_EEE_NTF, ETHTOOL_MSG_TSINFO_GET_REPLY, + ETHTOOL_MSG_CABLE_TEST_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -417,6 +418,64 @@ enum { ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1 }; +/* CABLE TEST NOTIFY */ +enum { + ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC, + ETHTOOL_A_CABLE_RESULT_CODE_OK, + ETHTOOL_A_CABLE_RESULT_CODE_OPEN, + ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT, + ETHTOOL_A_CABLE_RESULT_CODE_CROSS_SHORT, +}; + +enum { + ETHTOOL_A_CABLE_PAIR_A, + ETHTOOL_A_CABLE_PAIR_B, + ETHTOOL_A_CABLE_PAIR_C, + ETHTOOL_A_CABLE_PAIR_D, +}; + +enum { + ETHTOOL_A_CABLE_RESULT_UNSPEC, + ETHTOOL_A_CABLE_RESULT_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ + ETHTOOL_A_CABLE_RESULT_CODE, /* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */ + + __ETHTOOL_A_CABLE_RESULT_CNT, + ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC, + ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ + ETHTOOL_A_CABLE_FAULT_LENGTH_CM, /* u32 */ + + __ETHTOOL_A_CABLE_FAULT_LENGTH_CNT, + ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_TEST_NTF_STATUS_UNSPEC, + ETHTOOL_A_CABLE_TEST_NTF_STATUS_STARTED, + ETHTOOL_A_CABLE_TEST_NTF_STATUS_COMPLETED +}; + +enum { + ETHTOOL_A_CABLE_NEST_UNSPEC, + ETHTOOL_A_CABLE_NEST_RESULT, /* nest - ETHTOOL_A_CABLE_RESULT_ */ + ETHTOOL_A_CABLE_NEST_FAULT_LENGTH, /* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */ + __ETHTOOL_A_CABLE_NEST_CNT, + ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_TEST_NTF_UNSPEC, + ETHTOOL_A_CABLE_TEST_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ + ETHTOOL_A_CABLE_TEST_NTF_NEST, /* nest - of results: */ + + __ETHTOOL_A_CABLE_TEST_NTF_CNT, + ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3 From 734e5e4e268f792d514ec2313792f30ec5f6c94f Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 5 May 2020 22:13:06 +0200 Subject: rtc: add new VL flag for backup switchover A new flag RTC_VL_BACKUP_SWITCH means that a backup switchover happened since last flag clear. Link: https://lore.kernel.org/r/20200505201310.255145-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- include/uapi/linux/rtc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index 83bba58d47f4..fa9aff91cbf2 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -99,6 +99,7 @@ struct rtc_pll_info { #define RTC_VL_BACKUP_LOW _BITUL(1) /* Backup voltage is low */ #define RTC_VL_BACKUP_EMPTY _BITUL(2) /* Backup empty or not present */ #define RTC_VL_ACCURACY_LOW _BITUL(3) /* Voltage is low, RTC accuracy is reduced */ +#define RTC_VL_BACKUP_SWITCH _BITUL(4) /* Backup switchover happened */ #define RTC_VL_READ _IOR('p', 0x13, unsigned int) /* Voltage low detection */ #define RTC_VL_CLR _IO('p', 0x14) /* Clear voltage low information */ -- cgit v1.2.3 From ab8d78093dfa2e7820ca0c28dda9142aa771c510 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 11 May 2020 17:15:35 +0100 Subject: bpf: Minor fixes to BPF helpers documentation Minor improvements to the documentation for BPF helpers: * Fix formatting for the description of "bpf_socket" for bpf_getsockopt() and bpf_setsockopt(), thus suppressing two warnings from rst2man about "Unexpected indentation". * Fix formatting for return values for bpf_sk_assign() and seq_file helpers. * Fix and harmonise formatting, in particular for function/struct names. * Remove blank lines before "Return:" sections. * Replace tabs found in the middle of text lines. * Fix typos. * Add a note to the footer (in Python script) about "bpftool feature probe", including for listing features available to unprivileged users, and add a reference to bpftool man page. Thanks to Florian for reporting two typos (duplicated words). Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200511161536.29853-4-quentin@isovalent.com --- include/uapi/linux/bpf.h | 109 +++++++++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 50 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9d1932e23cec..bfb31c1be219 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -675,8 +675,8 @@ union bpf_attr { * For tracing programs, safely attempt to read *size* bytes from * kernel space address *unsafe_ptr* and store the data in *dst*. * - * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel() - * instead. + * Generally, use **bpf_probe_read_user**\ () or + * **bpf_probe_read_kernel**\ () instead. * Return * 0 on success, or a negative error in case of failure. * @@ -684,7 +684,7 @@ union bpf_attr { * Description * Return the time elapsed since system boot, in nanoseconds. * Does not include time the system was suspended. - * See: clock_gettime(CLOCK_MONOTONIC) + * See: **clock_gettime**\ (**CLOCK_MONOTONIC**) * Return * Current *ktime*. * @@ -1543,11 +1543,11 @@ union bpf_attr { * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address - * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for + * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for * more details. * - * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str() - * instead. + * Generally, use **bpf_probe_read_user_str**\ () or + * **bpf_probe_read_kernel_str**\ () instead. * Return * On success, the strictly positive length of the string, * including the trailing NUL character. On error, a negative @@ -1575,7 +1575,7 @@ union bpf_attr { * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description - * Equivalent to bpf_get_socket_cookie() helper that accepts + * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return * A 8-byte long non-decreasing number. @@ -1604,6 +1604,7 @@ union bpf_attr { * The option value of length *optlen* is pointed by *optval*. * * *bpf_socket* should be one of the following: + * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** * and **BPF_CGROUP_INET6_CONNECT**. @@ -1672,12 +1673,12 @@ union bpf_attr { * * The lower two bits of *flags* are used as the return code if * the map lookup fails. This is so that the return value can be - * one of the XDP program return codes up to XDP_TX, as chosen by - * the caller. Any higher bits in the *flags* argument must be + * one of the XDP program return codes up to **XDP_TX**, as chosen + * by the caller. Any higher bits in the *flags* argument must be * unset. * - * See also bpf_redirect(), which only supports redirecting to an - * ifindex, but doesn't require a map to do so. + * See also **bpf_redirect**\ (), which only supports redirecting + * to an ifindex, but doesn't require a map to do so. * Return * **XDP_REDIRECT** on success, or the value of the two lower bits * of the *flags* argument on error. @@ -1785,7 +1786,7 @@ union bpf_attr { * the time running for event since last normalization. The * enabled and running times are accumulated since the perf event * open. To achieve scaling factor between two invocations of an - * eBPF program, users can can use CPU id as the key (which is + * eBPF program, users can use CPU id as the key (which is * typical for perf array usage model) to remember the previous * value and do the calculation inside the eBPF program. * Return @@ -1812,6 +1813,7 @@ union bpf_attr { * *opval* and of length *optlen*. * * *bpf_socket* should be one of the following: + * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** * and **BPF_CGROUP_INET6_CONNECT**. @@ -1833,7 +1835,7 @@ union bpf_attr { * The first argument is the context *regs* on which the kprobe * works. * - * This helper works by setting setting the PC (program counter) + * This helper works by setting the PC (program counter) * to an override function which is run in place of the original * probed function. This means the probed function is not run at * all. The replacement function just returns with the required @@ -2300,7 +2302,7 @@ union bpf_attr { * **bpf_rc_keydown**\ () again with the same values, or calling * **bpf_rc_repeat**\ (). * - * Some protocols include a toggle bit, in case the button was + * Some protocols include a toggle bit, in case the button was * released and pressed again between consecutive scancodes. * * The *ctx* should point to the lirc sample as passed into @@ -2646,7 +2648,6 @@ union bpf_attr { * * *th* points to the start of the TCP header, while *th_len* * contains **sizeof**\ (**struct tcphdr**). - * * Return * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. @@ -2829,7 +2830,6 @@ union bpf_attr { * * *th* points to the start of the TCP header, while *th_len* * contains the length of the TCP header. - * * Return * On success, lower 32 bits hold the generated SYN cookie in * followed by 16 bits which hold the MSS value for that cookie, @@ -2912,7 +2912,7 @@ union bpf_attr { * // size, after checking its boundaries. * } * - * In comparison, using **bpf_probe_read_user()** helper here + * In comparison, using **bpf_probe_read_user**\ () helper here * instead to read the string would require to estimate the length * at compile time, and would often result in copying more memory * than necessary. @@ -2930,14 +2930,14 @@ union bpf_attr { * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* - * to *dst*. Same semantics as with bpf_probe_read_user_str() apply. + * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply. * Return - * On success, the strictly positive length of the string, including + * On success, the strictly positive length of the string, including * the trailing NUL character. On error, a negative value. * * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt) * Description - * Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock. + * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**. * *rcv_nxt* is the ack_seq to be sent out. * Return * 0 on success, or a negative error in case of failure. @@ -2965,19 +2965,19 @@ union bpf_attr { * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) * Description * For an eBPF program attached to a perf event, retrieve the - * branch records (struct perf_branch_entry) associated to *ctx* - * and store it in the buffer pointed by *buf* up to size + * branch records (**struct perf_branch_entry**) associated to *ctx* + * and store it in the buffer pointed by *buf* up to size * *size* bytes. * Return * On success, number of bytes written to *buf*. On error, a * negative value. * * The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to - * instead return the number of bytes required to store all the + * instead return the number of bytes required to store all the * branch entries. If this flag is set, *buf* may be NULL. * * **-EINVAL** if arguments invalid or **size** not a multiple - * of sizeof(struct perf_branch_entry). + * of **sizeof**\ (**struct perf_branch_entry**\ ). * * **-ENOENT** if architecture does not support branch records. * @@ -2985,8 +2985,8 @@ union bpf_attr { * Description * Returns 0 on success, values for *pid* and *tgid* as seen from the current * *namespace* will be returned in *nsdata*. - * - * On failure, the returned value is one of the following: + * Return + * 0 on success, or one of the following in case of failure: * * **-EINVAL** if dev and inum supplied don't match dev_t and inode number * with nsfs of current task, or if dev conversion to dev_t lost high bits. @@ -3025,8 +3025,8 @@ union bpf_attr { * a global identifier that can be assumed unique. If *ctx* is * NULL, then the helper returns the cookie for the initial * network namespace. The cookie itself is very similar to that - * of bpf_get_socket_cookie() helper, but for network namespaces - * instead of sockets. + * of **bpf_get_socket_cookie**\ () helper, but for network + * namespaces instead of sockets. * Return * A 8-byte long opaque number. * @@ -3061,57 +3061,66 @@ union bpf_attr { * * The *flags* argument must be zero. * Return - * 0 on success, or a negative errno in case of failure. + * 0 on success, or a negative error in case of failure: * - * * **-EINVAL** Unsupported flags specified. - * * **-ENOENT** Socket is unavailable for assignment. - * * **-ENETUNREACH** Socket is unreachable (wrong netns). - * * **-EOPNOTSUPP** Unsupported operation, for example a - * call from outside of TC ingress. - * * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport). + * **-EINVAL** if specified *flags* are not supported. + * + * **-ENOENT** if the socket is unavailable for assignment. + * + * **-ENETUNREACH** if the socket is unreachable (wrong netns). + * + * **-EOPNOTSUPP** if the operation is not supported, for example + * a call from outside of TC ingress. + * + * **-ESOCKTNOSUPPORT** if the socket type is not supported + * (reuseport). * * u64 bpf_ktime_get_boot_ns(void) * Description * Return the time elapsed since system boot, in nanoseconds. * Does include the time the system was suspended. - * See: clock_gettime(CLOCK_BOOTTIME) + * See: **clock_gettime**\ (**CLOCK_BOOTTIME**) * Return * Current *ktime*. * * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) * Description - * seq_printf uses seq_file seq_printf() to print out the format string. + * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print + * out the format string. * The *m* represents the seq_file. The *fmt* and *fmt_size* are for * the format string itself. The *data* and *data_len* are format string - * arguments. The *data* are a u64 array and corresponding format string + * arguments. The *data* are a **u64** array and corresponding format string * values are stored in the array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* array. - * The *data_len* is the *data* size in term of bytes. + * The *data_len* is the size of *data* in bytes. * * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. * Reading kernel memory may fail due to either invalid address or * valid address but requiring a major memory fault. If reading kernel memory * fails, the string for **%s** will be an empty string, and the ip * address for **%p{i,I}{4,6}** will be 0. Not returning error to - * bpf program is consistent with what bpf_trace_printk() does for now. + * bpf program is consistent with what **bpf_trace_printk**\ () does for now. * Return - * 0 on success, or a negative errno in case of failure. + * 0 on success, or a negative error in case of failure: + * + * **-EBUSY** if per-CPU memory copy buffer is busy, can try again + * by returning 1 from bpf program. + * + * **-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported. + * + * **-E2BIG** if *fmt* contains too many format specifiers. * - * * **-EBUSY** Percpu memory copy buffer is busy, can try again - * by returning 1 from bpf program. - * * **-EINVAL** Invalid arguments, or invalid/unsupported formats. - * * **-E2BIG** Too many format specifiers. - * * **-EOVERFLOW** Overflow happens, the same object will be tried again. + * **-EOVERFLOW** if an overflow happened: The same object will be tried again. * * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) * Description - * seq_write uses seq_file seq_write() to write the data. + * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data. * The *m* represents the seq_file. The *data* and *len* represent the - * data to write in bytes. + * data to write in bytes. * Return - * 0 on success, or a negative errno in case of failure. + * 0 on success, or a negative error in case of failure: * - * * **-EOVERFLOW** Overflow happens, the same object will be tried again. + * **-EOVERFLOW** if an overflow happened: The same object will be tried again. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ -- cgit v1.2.3 From 6446ec6cbf46483737e832cd6050885fa8eb87fa Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 7 May 2020 17:12:52 +0200 Subject: media: v4l2-subdev: add VIDIOC_SUBDEV_QUERYCAP ioctl While normal video/radio/vbi/swradio nodes have a proper QUERYCAP ioctl that apps can call to determine that it is indeed a V4L2 device, there is currently no equivalent for v4l-subdev nodes. Adding this ioctl will solve that, and it will allow utilities like v4l2-compliance to be used with these devices as well. SUBDEV_QUERYCAP currently returns the version and capabilities of the subdevice. Define a capability flag to report if the subdevice is registered in read-only mode. Signed-off-by: Hans Verkuil Signed-off-by: Jacopo Mondi Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-subdev.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h index 03970ce30741..5d2a1dab7911 100644 --- a/include/uapi/linux/v4l2-subdev.h +++ b/include/uapi/linux/v4l2-subdev.h @@ -155,9 +155,25 @@ struct v4l2_subdev_selection { __u32 reserved[8]; }; +/** + * struct v4l2_subdev_capability - subdev capabilities + * @version: the driver versioning number + * @capabilities: the subdev capabilities, see V4L2_SUBDEV_CAP_* + * @reserved: for future use, set to zero for now + */ +struct v4l2_subdev_capability { + __u32 version; + __u32 capabilities; + __u32 reserved[14]; +}; + +/* The v4l2 sub-device video device node is registered in read-only mode. */ +#define V4L2_SUBDEV_CAP_RO_SUBDEV BIT(0) + /* Backwards compatibility define --- to be removed */ #define v4l2_subdev_edid v4l2_edid +#define VIDIOC_SUBDEV_QUERYCAP _IOR('V', 0, struct v4l2_subdev_capability) #define VIDIOC_SUBDEV_G_FMT _IOWR('V', 4, struct v4l2_subdev_format) #define VIDIOC_SUBDEV_S_FMT _IOWR('V', 5, struct v4l2_subdev_format) #define VIDIOC_SUBDEV_G_FRAME_INTERVAL _IOWR('V', 21, struct v4l2_subdev_frame_interval) -- cgit v1.2.3 From 7d33850abdb9048c6aa421440a64905eb4ad07a2 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 31 Mar 2020 11:40:33 +0200 Subject: floppy: add references to 82077's extra registers This controller provides extra status registers SRA and SRB as well as a tape drive register (TDR) and a data rate select register (DSR), which are referenced in the sparc port, so let's have their symbolic definitions centralized. Link: https://lore.kernel.org/r/20200331094054.24441-3-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov --- include/uapi/linux/fdreg.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fdreg.h b/include/uapi/linux/fdreg.h index 1318881954e1..10d33632939d 100644 --- a/include/uapi/linux/fdreg.h +++ b/include/uapi/linux/fdreg.h @@ -7,13 +7,23 @@ * Handbook", Sanches and Canton. */ -/* Fd controller regs. S&C, about page 340 */ -#define FD_STATUS 4 -#define FD_DATA 5 +/* 82077's auxiliary status registers A & B (R) */ +#define FD_SRA 0 +#define FD_SRB 1 /* Digital Output Register */ #define FD_DOR 2 +/* 82077's tape drive register (R/W) */ +#define FD_TDR 3 + +/* 82077's data rate select register (W) */ +#define FD_DSR 4 + +/* Fd controller regs. S&C, about page 340 */ +#define FD_STATUS 4 +#define FD_DATA 5 + /* Digital Input Register (read) */ #define FD_DIR 7 -- cgit v1.2.3 From 9c4c5a24c85585fb8904bd2872501cd8181b3854 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 1 May 2020 16:44:14 +0300 Subject: floppy: add FD_AUTODETECT_SIZE define for struct floppy_drive_params Use FD_AUTODETECT_SIZE for autodetect buffer size in struct floppy_drive_params instead of a magic number. Link: https://lore.kernel.org/r/20200501134416.72248-3-efremov@linux.com Reviewed-by: Christoph Hellwig Signed-off-by: Denis Efremov --- include/uapi/linux/fd.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fd.h b/include/uapi/linux/fd.h index 90fb94712c41..3f6b7be4c096 100644 --- a/include/uapi/linux/fd.h +++ b/include/uapi/linux/fd.h @@ -172,7 +172,10 @@ struct floppy_drive_params { * used in succession to try to read the disk. If the FDC cannot lock onto * the disk, the next format is tried. This uses the variable 'probing'. */ - short autodetect[8]; /* autodetected formats */ + +#define FD_AUTODETECT_SIZE 8 + + short autodetect[FD_AUTODETECT_SIZE]; /* autodetected formats */ int checkfreq; /* how often should the drive be checked for disk * changes */ -- cgit v1.2.3 From bd10a5f3e21b1cb8e2133c1f08b3e8207cee12dd Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 1 May 2020 16:44:15 +0300 Subject: floppy: add defines for sizes of cmd & reply buffers of floppy_raw_cmd Use FD_RAW_CMD_SIZE, FD_RAW_REPLY_SIZE defines instead of magic numbers for cmd & reply buffers of struct floppy_raw_cmd. Remove local to floppy.c MAX_REPLIES define, as it is now FD_RAW_REPLY_SIZE. FD_RAW_CMD_FULLSIZE added as we allow command to also fill reply_count and reply fields. Link: https://lore.kernel.org/r/20200501134416.72248-4-efremov@linux.com Reviewed-by: Christoph Hellwig Signed-off-by: Denis Efremov --- include/uapi/linux/fd.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fd.h b/include/uapi/linux/fd.h index 3f6b7be4c096..2e9c2c1c18e6 100644 --- a/include/uapi/linux/fd.h +++ b/include/uapi/linux/fd.h @@ -360,10 +360,20 @@ struct floppy_raw_cmd { int buffer_length; /* length of allocated buffer */ unsigned char rate; + +#define FD_RAW_CMD_SIZE 16 +#define FD_RAW_REPLY_SIZE 16 +#define FD_RAW_CMD_FULLSIZE (FD_RAW_CMD_SIZE + 1 + FD_RAW_REPLY_SIZE) + + /* The command may take up the space initially intended for the reply + * and the reply count. Needed for long 82078 commands such as RESTORE, + * which takes 17 command bytes. + */ + unsigned char cmd_count; - unsigned char cmd[16]; + unsigned char cmd[FD_RAW_CMD_SIZE]; unsigned char reply_count; - unsigned char reply[16]; + unsigned char reply[FD_RAW_REPLY_SIZE]; int track; int resultcode; -- cgit v1.2.3 From 0836275df4db20daf040fff5d9a1da89c4c08a85 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 1 May 2020 16:44:16 +0300 Subject: floppy: suppress UBSAN warning in setup_rw_floppy() UBSAN: array-index-out-of-bounds in drivers/block/floppy.c:1521:45 index 16 is out of range for type 'unsigned char [16]' Call Trace: ... setup_rw_floppy+0x5c3/0x7f0 floppy_ready+0x2be/0x13b0 process_one_work+0x2c1/0x5d0 worker_thread+0x56/0x5e0 kthread+0x122/0x170 ret_from_fork+0x35/0x40 From include/uapi/linux/fd.h: struct floppy_raw_cmd { ... unsigned char cmd_count; unsigned char cmd[16]; unsigned char reply_count; unsigned char reply[16]; ... } This out-of-bounds access is intentional. The command in struct floppy_raw_cmd may take up the space initially intended for the reply and the reply count. It is needed for long 82078 commands such as RESTORE, which takes 17 command bytes. Initial cmd size is not enough and since struct setup_rw_floppy is a part of uapi we check that cmd_count is in [0:16+1+16] in raw_cmd_copyin(). The patch adds union with original cmd,reply_count,reply fields and fullcmd field of equivalent size. The cmd accesses are turned to fullcmd where appropriate to suppress UBSAN warning. Link: https://lore.kernel.org/r/20200501134416.72248-5-efremov@linux.com Reviewed-by: Christoph Hellwig Signed-off-by: Denis Efremov --- include/uapi/linux/fd.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fd.h b/include/uapi/linux/fd.h index 2e9c2c1c18e6..8b80c63b971c 100644 --- a/include/uapi/linux/fd.h +++ b/include/uapi/linux/fd.h @@ -371,9 +371,14 @@ struct floppy_raw_cmd { */ unsigned char cmd_count; - unsigned char cmd[FD_RAW_CMD_SIZE]; - unsigned char reply_count; - unsigned char reply[FD_RAW_REPLY_SIZE]; + union { + struct { + unsigned char cmd[FD_RAW_CMD_SIZE]; + unsigned char reply_count; + unsigned char reply[FD_RAW_REPLY_SIZE]; + }; + unsigned char fullcmd[FD_RAW_CMD_FULLSIZE]; + }; int track; int resultcode; -- cgit v1.2.3 From 581701b7efd60ba13d8a7eed60cbdd7fefaf6696 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 14 May 2020 16:44:24 +0200 Subject: uapi: deprecate STATX_ALL Constants of the *_ALL type can be actively harmful due to the fact that developers will usually fail to consider the possible effects of future changes to the definition. Deprecate STATX_ALL in the uapi, while no damage has been done yet. We could keep something like this around in the kernel, but there's actually no point, since all filesystems should be explicitly checking flags that they support and not rely on the VFS masking unknown ones out: a flag could be known to the VFS, yet not known to the filesystem. Cc: David Howells Cc: linux-api@vger.kernel.org Cc: linux-man@vger.kernel.org Signed-off-by: Miklos Szeredi Reviewed-by: Christoph Hellwig --- include/uapi/linux/stat.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index ad80a5c885d5..d1192783139a 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -148,9 +148,18 @@ struct statx { #define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ -#define STATX_ALL 0x00000fffU /* All currently supported flags */ + #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ +#ifndef __KERNEL__ +/* + * This is deprecated, and shall remain the same value in the future. To avoid + * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME) + * instead. + */ +#define STATX_ALL 0x00000fffU +#endif + /* * Attributes to be found in stx_attributes and masked in stx_attributes_mask. * -- cgit v1.2.3 From fa2fcf4f1df1559a0a4ee0f46915b496cc2ebf60 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 14 May 2020 16:44:24 +0200 Subject: statx: add mount ID Systemd is hacking around to get it and it's trivial to add to statx, so... Cc: linux-api@vger.kernel.org Cc: linux-man@vger.kernel.org Signed-off-by: Miklos Szeredi Reviewed-by: Christoph Hellwig --- include/uapi/linux/stat.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index d1192783139a..d81456247f10 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -123,7 +123,10 @@ struct statx { __u32 stx_dev_major; /* ID of device containing file [uncond] */ __u32 stx_dev_minor; /* 0x90 */ - __u64 __spare2[14]; /* Spare space for future expansion */ + __u64 stx_mnt_id; + __u64 __spare2; + /* 0xa0 */ + __u64 __spare3[12]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -148,6 +151,7 @@ struct statx { #define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ +#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ -- cgit v1.2.3 From 80340fe3605c0e78cfe496c3b3878be828cfdbfe Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 14 May 2020 16:44:24 +0200 Subject: statx: add mount_root Determining whether a path or file descriptor refers to a mountpoint (or more precisely a mount root) is not trivial using current tools. Add a flag to statx that indicates whether the path or fd refers to the root of a mount or not. Cc: linux-api@vger.kernel.org Cc: linux-man@vger.kernel.org Reported-by: Lennart Poettering Reported-by: J. Bruce Fields Signed-off-by: Miklos Szeredi Reviewed-by: Christoph Hellwig --- include/uapi/linux/stat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index d81456247f10..6df9348bb277 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -181,6 +181,7 @@ struct statx { #define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ #define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ +#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ -- cgit v1.2.3 From c8ffd8bcdd28296a198f237cc595148a8d4adfbe Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 14 May 2020 16:44:25 +0200 Subject: vfs: add faccessat2 syscall POSIX defines faccessat() as having a fourth "flags" argument, while the linux syscall doesn't have it. Glibc tries to emulate AT_EACCESS and AT_SYMLINK_NOFOLLOW, but AT_EACCESS emulation is broken. Add a new faccessat(2) syscall with the added flags argument and implement both flags. The value of AT_EACCESS is defined in glibc headers to be the same as AT_REMOVEDIR. Use this value for the kernel interface as well, together with the explanatory comment. Also add AT_EMPTY_PATH support, which is not documented by POSIX, but can be useful and is trivial to implement. Signed-off-by: Miklos Szeredi --- include/uapi/linux/fcntl.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index ca88b7bce553..2f86b2ad6d7e 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -84,10 +84,20 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +/* + * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is + * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to + * unlinkat. The two functions do completely different things and therefore, + * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to + * faccessat would be undefined behavior and thus treating it equivalent to + * AT_EACCESS is valid undefined behavior. + */ #define AT_FDCWD -100 /* Special value used to indicate openat should use the current working directory. */ #define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ +#define AT_EACCESS 0x200 /* Test access permitted for + effective IDs, not real IDs. */ #define AT_REMOVEDIR 0x200 /* Remove directory instead of unlinking file. */ #define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ -- cgit v1.2.3 From 7aebfa1b3885b5aa29fcb4a596d0485ac463bbe8 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 13 May 2020 18:50:27 -0700 Subject: bpf: Support narrow loads from bpf_sock_addr.user_port bpf_sock_addr.user_port supports only 4-byte load and it leads to ugly code in BPF programs, like: volatile __u32 user_port = ctx->user_port; __u16 port = bpf_ntohs(user_port); Since otherwise clang may optimize the load to be 2-byte and it's rejected by verifier. Add support for 1- and 2-byte loads same way as it's supported for other fields in bpf_sock_addr like user_ip4, msg_src_ip4, etc. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/c1e983f4c17573032601d0b2b1f9d1274f24bc16.1589420814.git.rdna@fb.com --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bfb31c1be219..85cfdffde182 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3728,7 +3728,7 @@ struct bpf_sock_addr { __u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. * Stored in network byte order. */ - __u32 user_port; /* Allows 4-byte read and write. + __u32 user_port; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order */ __u32 family; /* Allows 4-byte read, but no write */ -- cgit v1.2.3 From f307fa2cb4c935f7f1ff0aeb880c7b44fb9a642b Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Thu, 14 May 2020 13:03:47 -0700 Subject: bpf: Introduce bpf_sk_{, ancestor_}cgroup_id helpers With having ability to lookup sockets in cgroup skb programs it becomes useful to access cgroup id of retrieved sockets so that policies can be implemented based on origin cgroup of such socket. For example, a container running in a cgroup can have cgroup skb ingress program that can lookup peer socket that is sending packets to a process inside the container and decide whether those packets should be allowed or denied based on cgroup id of the peer. More specifically such ingress program can implement intra-host policy "allow incoming packets only from this same container and not from any other container on same host" w/o relying on source IP addresses since quite often it can be the case that containers share same IP address on the host. Introduce two new helpers for this use-case: bpf_sk_cgroup_id() and bpf_sk_ancestor_cgroup_id(). These helpers are similar to existing bpf_skb_{,ancestor_}cgroup_id helpers with the only difference that sk is used to get cgroup id instead of skb, and share code with them. See documentation in UAPI for more details. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/f5884981249ce911f63e9b57ecd5d7d19154ff39.1589486450.git.rdna@fb.com --- include/uapi/linux/bpf.h | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 85cfdffde182..146c742f1d49 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3121,6 +3121,38 @@ union bpf_attr { * 0 on success, or a negative error in case of failure: * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. + * + * u64 bpf_sk_cgroup_id(struct bpf_sock *sk) + * Description + * Return the cgroup v2 id of the socket *sk*. + * + * *sk* must be a non-**NULL** pointer to a full socket, e.g. one + * returned from **bpf_sk_lookup_xxx**\ (), + * **bpf_sk_fullsock**\ (), etc. The format of returned id is + * same as in **bpf_skb_cgroup_id**\ (). + * + * This helper is available only if the kernel was compiled with + * the **CONFIG_SOCK_CGROUP_DATA** configuration option. + * Return + * The id is returned or 0 in case the id could not be retrieved. + * + * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level) + * Description + * Return id of cgroup v2 that is ancestor of cgroup associated + * with the *sk* at the *ancestor_level*. The root cgroup is at + * *ancestor_level* zero and each step down the hierarchy + * increments the level. If *ancestor_level* == level of cgroup + * associated with *sk*, then return value will be same as that + * of **bpf_sk_cgroup_id**\ (). + * + * The helper is useful to implement policies based on cgroups + * that are upper in hierarchy than immediate cgroup associated + * with *sk*. + * + * The format of returned id and helper limitations are same as in + * **bpf_sk_cgroup_id**\ (). + * Return + * The id is returned or 0 in case the id could not be retrieved. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3250,7 +3282,9 @@ union bpf_attr { FN(sk_assign), \ FN(ktime_get_boot_ns), \ FN(seq_printf), \ - FN(seq_write), + FN(seq_write), \ + FN(sk_cgroup_id), \ + FN(sk_ancestor_cgroup_id), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From c8741e2bfe872425ea6f10bb6f7dc1d67bc60c3a Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 14 May 2020 12:51:25 +0200 Subject: xdp: Allow bpf_xdp_adjust_tail() to grow packet size Finally, after all drivers have a frame size, allow BPF-helper bpf_xdp_adjust_tail() to grow or extend packet size at frame tail. Remember that helper/macro xdp_data_hard_end have reserved some tailroom. Thus, this helper makes sure that the BPF-prog don't have access to this tailroom area. V2: Remove one chicken check and use WARN_ONCE for other Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/158945348530.97035.12577148209134239291.stgit@firesoul --- include/uapi/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 32cbf36c7729..b9b8a0f63b91 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2015,8 +2015,8 @@ union bpf_attr { * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is - * only possible to shrink the packet as of this writing, - * therefore *delta* must be a negative integer. + * possible to both shrink and grow the packet tail. + * Shrink done via *delta* being a negative integer. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers -- cgit v1.2.3 From a17b53c4a4b55ec322c132b6670743612229ee9c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 13 May 2020 16:03:53 -0700 Subject: bpf, capability: Introduce CAP_BPF Split BPF operations that are allowed under CAP_SYS_ADMIN into combination of CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN. For backward compatibility include them in CAP_SYS_ADMIN as well. The end result provides simple safety model for applications that use BPF: - to load tracing program types BPF_PROG_TYPE_{KPROBE, TRACEPOINT, PERF_EVENT, RAW_TRACEPOINT, etc} use CAP_BPF and CAP_PERFMON - to load networking program types BPF_PROG_TYPE_{SCHED_CLS, XDP, SK_SKB, etc} use CAP_BPF and CAP_NET_ADMIN There are few exceptions from this rule: - bpf_trace_printk() is allowed in networking programs, but it's using tracing mechanism, hence this helper needs additional CAP_PERFMON if networking program is using this helper. - BPF_F_ZERO_SEED flag for hash/lru map is allowed under CAP_SYS_ADMIN only to discourage production use. - BPF HW offload is allowed under CAP_SYS_ADMIN. - bpf_probe_write_user() is allowed under CAP_SYS_ADMIN only. CAPs are not checked at attach/detach time with two exceptions: - loading BPF_PROG_TYPE_CGROUP_SKB is allowed for unprivileged users, hence CAP_NET_ADMIN is required at attach time. - flow_dissector detach doesn't check prog FD at detach, hence CAP_NET_ADMIN is required at detach time. CAP_SYS_ADMIN is required to iterate BPF objects (progs, maps, links) via get_next_id command and convert them to file descriptor via GET_FD_BY_ID command. This restriction guarantees that mutliple tasks with CAP_BPF are not able to affect each other. That leads to clean isolation of tasks. For example: task A with CAP_BPF and CAP_NET_ADMIN loads and attaches a firewall via bpf_link. task B with the same capabilities cannot detach that firewall unless task A explicitly passed link FD to task B via scm_rights or bpffs. CAP_SYS_ADMIN can still detach/unload everything. Two networking user apps with CAP_SYS_ADMIN and CAP_NET_ADMIN can accidentely mess with each other programs and maps. Two networking user apps with CAP_NET_ADMIN and CAP_BPF cannot affect each other. CAP_NET_ADMIN + CAP_BPF allows networking programs access only packet data. Such networking progs cannot access arbitrary kernel memory or leak pointers. bpftool, bpftrace, bcc tools binaries should NOT be installed with CAP_BPF and CAP_PERFMON, since unpriv users will be able to read kernel secrets. But users with these two permissions will be able to use these tracing tools. CAP_PERFMON is least secure, since it allows kprobes and kernel memory access. CAP_NET_ADMIN can stop network traffic via iproute2. CAP_BPF is the safest from security point of view and harmless on its own. Having CAP_BPF and/or CAP_NET_ADMIN is not enough to write into arbitrary map and if that map is used by firewall-like bpf prog. CAP_BPF allows many bpf prog_load commands in parallel. The verifier may consume large amount of memory and significantly slow down the system. Existing unprivileged BPF operations are not affected. In particular unprivileged users are allowed to load socket_filter and cg_skb program types and to create array, hash, prog_array, map-in-map map types. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200513230355.7858-2-alexei.starovoitov@gmail.com --- include/uapi/linux/capability.h | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index e58c9636741b..c7372180a0a9 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -274,6 +274,7 @@ struct vfs_ns_cap_data { arbitrary SCSI commands */ /* Allow setting encryption key on loopback filesystem */ /* Allow setting zone reclaim policy */ +/* Allow everything under CAP_BPF and CAP_PERFMON for backward compatibility */ #define CAP_SYS_ADMIN 21 @@ -374,7 +375,38 @@ struct vfs_ns_cap_data { #define CAP_PERFMON 38 -#define CAP_LAST_CAP CAP_PERFMON +/* + * CAP_BPF allows the following BPF operations: + * - Creating all types of BPF maps + * - Advanced verifier features + * - Indirect variable access + * - Bounded loops + * - BPF to BPF function calls + * - Scalar precision tracking + * - Larger complexity limits + * - Dead code elimination + * - And potentially other features + * - Loading BPF Type Format (BTF) data + * - Retrieve xlated and JITed code of BPF programs + * - Use bpf_spin_lock() helper + * + * CAP_PERFMON relaxes the verifier checks further: + * - BPF progs can use of pointer-to-integer conversions + * - speculation attack hardening measures are bypassed + * - bpf_probe_read to read arbitrary kernel memory is allowed + * - bpf_trace_printk to print kernel memory is allowed + * + * CAP_SYS_ADMIN is required to use bpf_probe_write_user. + * + * CAP_SYS_ADMIN is required to iterate system wide loaded + * programs, maps, links, BTFs and convert their IDs to file descriptors. + * + * CAP_PERFMON and CAP_BPF are required to load tracing programs. + * CAP_NET_ADMIN and CAP_BPF are required to load networking programs. + */ +#define CAP_BPF 39 + +#define CAP_LAST_CAP CAP_BPF #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) -- cgit v1.2.3 From f8ab1807a9c9aa14478920e64d1c9d3685aae26f Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 15 May 2020 14:40:11 +0300 Subject: net: sched: introduce terse dump flag Add new TCA_DUMP_FLAGS attribute and use it in cls API to request terse filter output from classifiers with TCA_DUMP_FLAGS_TERSE flag. This option is intended to be used to improve performance of TC filter dump when userland only needs to obtain stats and not the whole classifier/action data. Extend struct tcf_proto_ops with new terse_dump() callback that must be defined by supporting classifier implementations. Support of the options in specific classifiers and actions is implemented in following patches in the series. Signed-off-by: Vlad Buslov Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 4a8c5b745157..073e71ef6bdd 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -609,11 +609,17 @@ enum { TCA_HW_OFFLOAD, TCA_INGRESS_BLOCK, TCA_EGRESS_BLOCK, + TCA_DUMP_FLAGS, __TCA_MAX }; #define TCA_MAX (__TCA_MAX - 1) +#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic + * data necessary to identify the objects + * (handle, cookie, etc.) and stats. + */ + #define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) #define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) -- cgit v1.2.3 From 0d9b5b3af134cddfdc1dd31d41946a0ad389bbf2 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 15 May 2020 18:38:04 +0200 Subject: io_uring: add 'cq_flags' field for the CQ ring This patch adds the new 'cq_flags' field that should be written by the application and read by the kernel. This new field is available to the userspace application through 'cq_off.flags'. We are using 4-bytes previously reserved and set to zero. This means that if the application finds this field to zero, then the new functionality is not supported. In the next patch we will introduce the first flag available. Signed-off-by: Stefano Garzarella Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index e48d746b8e2a..602bb0ece607 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -204,7 +204,9 @@ struct io_cqring_offsets { __u32 ring_entries; __u32 overflow; __u32 cqes; - __u64 resv[2]; + __u32 flags; + __u32 resv1; + __u64 resv2; }; /* -- cgit v1.2.3 From 7e55a19cf6e70ce08964b46dbbfbdb07fbc995fc Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 15 May 2020 18:38:05 +0200 Subject: io_uring: add IORING_CQ_EVENTFD_DISABLED to the CQ ring flags This new flag should be set/clear from the application to disable/enable eventfd notifications when a request is completed and queued to the CQ ring. Before this patch, notifications were always sent if an eventfd is registered, so IORING_CQ_EVENTFD_DISABLED is not set during the initialization. It will be up to the application to set the flag after initialization if no notifications are required at the beginning. Signed-off-by: Stefano Garzarella Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 602bb0ece607..8c5775df08b8 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -209,6 +209,13 @@ struct io_cqring_offsets { __u64 resv2; }; +/* + * cq_ring->flags + */ + +/* disable eventfd notifications */ +#define IORING_CQ_EVENTFD_DISABLED (1U << 0) + /* * io_uring_enter(2) flags */ -- cgit v1.2.3 From f2a8d5c7a218b9c24befb756c4eb30aa550ce822 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 17 May 2020 14:18:06 +0300 Subject: io_uring: add tee(2) support Add IORING_OP_TEE implementing tee(2) support. Almost identical to splice bits, but without offsets. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 8c5775df08b8..92c22699a5a7 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -129,6 +129,7 @@ enum { IORING_OP_SPLICE, IORING_OP_PROVIDE_BUFFERS, IORING_OP_REMOVE_BUFFERS, + IORING_OP_TEE, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 926645d43fd43622a2b056471a2cf41cc19cbf4c Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Sat, 9 May 2020 11:04:48 +0200 Subject: media: v4l2-ctrls: Add camera orientation and rotation Add support for the newly defined V4L2_CID_CAMERA_ORIENTATION and V4L2_CID_CAMERA_SENSOR_ROTATION read-only controls used to report the camera device mounting position and orientation respectively. Reviewed-by: Laurent Pinchart Signed-off-by: Jacopo Mondi Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 0ba1005c9651..62271418c1be 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -923,6 +923,13 @@ enum v4l2_auto_focus_range { #define V4L2_CID_PAN_SPEED (V4L2_CID_CAMERA_CLASS_BASE+32) #define V4L2_CID_TILT_SPEED (V4L2_CID_CAMERA_CLASS_BASE+33) +#define V4L2_CID_CAMERA_ORIENTATION (V4L2_CID_CAMERA_CLASS_BASE+34) +#define V4L2_CAMERA_ORIENTATION_FRONT 0 +#define V4L2_CAMERA_ORIENTATION_BACK 1 +#define V4L2_CAMERA_ORIENTATION_EXTERNAL 2 + +#define V4L2_CID_CAMERA_SENSOR_ROTATION (V4L2_CID_CAMERA_CLASS_BASE+35) + /* FM Modulator class control IDs */ #define V4L2_CID_FM_TX_CLASS_BASE (V4L2_CTRL_CLASS_FM_TX | 0x900) -- cgit v1.2.3 From b0d1f8741b812352fe0e5f3b2381427085f23e19 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Sat, 16 May 2020 14:20:46 +0800 Subject: iommu/vt-d: Add nested translation helper function Nested translation mode is supported in VT-d 3.0 Spec.CH 3.8. With PASID granular translation type set to 0x11b, translation result from the first level(FL) also subject to a second level(SL) page table translation. This mode is used for SVA virtualization, where FL performs guest virtual to guest physical translation and SL performs guest physical to host physical translation. This patch adds a helper function for setting up nested translation where second level comes from a domain and first level comes from a guest PGD. Signed-off-by: Jacob Pan Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20200516062101.29541-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/uapi/linux/iommu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index 4ad3496e5c43..e907b7091a46 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -285,6 +285,11 @@ struct iommu_gpasid_bind_data_vtd { __u32 emt; }; +#define IOMMU_SVA_VTD_GPASID_MTS_MASK (IOMMU_SVA_VTD_GPASID_CD | \ + IOMMU_SVA_VTD_GPASID_EMTE | \ + IOMMU_SVA_VTD_GPASID_PCD | \ + IOMMU_SVA_VTD_GPASID_PWT) + /** * struct iommu_gpasid_bind_data - Information about device and guest PASID binding * @version: Version of this data structure -- cgit v1.2.3 From 0858caa419e6cf9d31e734d09d70b34f64443ef6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 Feb 2020 13:58:35 +0000 Subject: uapi: General notification queue definitions Add UAPI definitions for the general notification queue, including the following pieces: (*) struct watch_notification. This is the metadata header for notification messages. It includes a type and subtype that indicate the source of the message (eg. WATCH_TYPE_MOUNT_NOTIFY) and the kind of the message (eg. NOTIFY_MOUNT_NEW_MOUNT). The header also contains an information field that conveys the following information: - WATCH_INFO_LENGTH. The size of the entry (entries are variable length). - WATCH_INFO_ID. The watch ID specified when the watchpoint was set. - WATCH_INFO_TYPE_INFO. (Sub)type-specific information. - WATCH_INFO_FLAG_*. Flag bits overlain on the type-specific information. For use by the type. All the information in the header can be used in filtering messages at the point of writing into the buffer. (*) struct watch_notification_removal This is an extended watch-removal notification record that includes an 'id' field that can indicate the identifier of the object being removed if available (for instance, a keyring serial number). Signed-off-by: David Howells --- include/uapi/linux/watch_queue.h | 55 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 include/uapi/linux/watch_queue.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h new file mode 100644 index 000000000000..5f3d21e8a34b --- /dev/null +++ b/include/uapi/linux/watch_queue.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_WATCH_QUEUE_H +#define _UAPI_LINUX_WATCH_QUEUE_H + +#include + +enum watch_notification_type { + WATCH_TYPE_META = 0, /* Special record */ + WATCH_TYPE__NR = 1 +}; + +enum watch_meta_notification_subtype { + WATCH_META_REMOVAL_NOTIFICATION = 0, /* Watched object was removed */ + WATCH_META_LOSS_NOTIFICATION = 1, /* Data loss occurred */ +}; + +/* + * Notification record header. This is aligned to 64-bits so that subclasses + * can contain __u64 fields. + */ +struct watch_notification { + __u32 type:24; /* enum watch_notification_type */ + __u32 subtype:8; /* Type-specific subtype (filterable) */ + __u32 info; +#define WATCH_INFO_LENGTH 0x0000007f /* Length of record */ +#define WATCH_INFO_LENGTH__SHIFT 0 +#define WATCH_INFO_ID 0x0000ff00 /* ID of watchpoint */ +#define WATCH_INFO_ID__SHIFT 8 +#define WATCH_INFO_TYPE_INFO 0xffff0000 /* Type-specific info */ +#define WATCH_INFO_TYPE_INFO__SHIFT 16 +#define WATCH_INFO_FLAG_0 0x00010000 /* Type-specific info, flag bit 0 */ +#define WATCH_INFO_FLAG_1 0x00020000 /* ... */ +#define WATCH_INFO_FLAG_2 0x00040000 +#define WATCH_INFO_FLAG_3 0x00080000 +#define WATCH_INFO_FLAG_4 0x00100000 +#define WATCH_INFO_FLAG_5 0x00200000 +#define WATCH_INFO_FLAG_6 0x00400000 +#define WATCH_INFO_FLAG_7 0x00800000 +}; + + +/* + * Extended watch removal notification. This is used optionally if the type + * wants to indicate an identifier for the object being watched, if there is + * such. This can be distinguished by the length. + * + * type -> WATCH_TYPE_META + * subtype -> WATCH_META_REMOVAL_NOTIFICATION + */ +struct watch_notification_removal { + struct watch_notification watch; + __u64 id; /* Type-dependent identifier */ +}; + +#endif /* _UAPI_LINUX_WATCH_QUEUE_H */ -- cgit v1.2.3 From b580b93664f91db8cb503429030df0f1c1e53528 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 Feb 2020 13:58:35 +0000 Subject: pipe: Add O_NOTIFICATION_PIPE Add an O_NOTIFICATION_PIPE flag that can be passed to pipe2() to indicate that the pipe being created is going to be used for notifications. This suppresses the use of splice(), vmsplice(), tee() and sendfile() on the pipe as calling iov_iter_revert() on a pipe when a kernel notification message has been inserted into the middle of a multi-buffer splice will be messy. The flag is given the same value as O_EXCL as it seems unlikely that this flag will ever be applicable to pipes and I don't want to use up another O_* bit unnecessarily. An alternative could be to add a pipe3() system call. Signed-off-by: David Howells --- include/uapi/linux/watch_queue.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h index 5f3d21e8a34b..9df72227f515 100644 --- a/include/uapi/linux/watch_queue.h +++ b/include/uapi/linux/watch_queue.h @@ -3,6 +3,9 @@ #define _UAPI_LINUX_WATCH_QUEUE_H #include +#include + +#define O_NOTIFICATION_PIPE O_EXCL /* Parameter to pipe2() selecting notification pipe */ enum watch_notification_type { WATCH_TYPE_META = 0, /* Special record */ -- cgit v1.2.3 From c73be61cede5882f9605a852414db559c0ebedfd Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Jan 2020 17:07:11 +0000 Subject: pipe: Add general notification queue support Make it possible to have a general notification queue built on top of a standard pipe. Notifications are 'spliced' into the pipe and then read out. splice(), vmsplice() and sendfile() are forbidden on pipes used for notifications as post_one_notification() cannot take pipe->mutex. This means that notifications could be posted in between individual pipe buffers, making iov_iter_revert() difficult to effect. The way the notification queue is used is: (1) An application opens a pipe with a special flag and indicates the number of messages it wishes to be able to queue at once (this can only be set once): pipe2(fds, O_NOTIFICATION_PIPE); ioctl(fds[0], IOC_WATCH_QUEUE_SET_SIZE, queue_depth); (2) The application then uses poll() and read() as normal to extract data from the pipe. read() will return multiple notifications if the buffer is big enough, but it will not split a notification across buffers - rather it will return a short read or EMSGSIZE. Notification messages include a length in the header so that the caller can split them up. Each message has a header that describes it: struct watch_notification { __u32 type:24; __u32 subtype:8; __u32 info; }; The type indicates the source (eg. mount tree changes, superblock events, keyring changes, block layer events) and the subtype indicates the event type (eg. mount, unmount; EIO, EDQUOT; link, unlink). The info field indicates a number of things, including the entry length, an ID assigned to a watchpoint contributing to this buffer and type-specific flags. Supplementary data, such as the key ID that generated an event, can be attached in additional slots. The maximum message size is 127 bytes. Messages may not be padded or aligned, so there is no guarantee, for example, that the notification type will be on a 4-byte bounary. Signed-off-by: David Howells --- include/uapi/linux/watch_queue.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h index 9df72227f515..3a5790f1f05d 100644 --- a/include/uapi/linux/watch_queue.h +++ b/include/uapi/linux/watch_queue.h @@ -4,9 +4,13 @@ #include #include +#include #define O_NOTIFICATION_PIPE O_EXCL /* Parameter to pipe2() selecting notification pipe */ +#define IOC_WATCH_QUEUE_SET_SIZE _IO('W', 0x60) /* Set the size in pages */ +#define IOC_WATCH_QUEUE_SET_FILTER _IO('W', 0x61) /* Set the filter */ + enum watch_notification_type { WATCH_TYPE_META = 0, /* Special record */ WATCH_TYPE__NR = 1 @@ -41,6 +45,22 @@ struct watch_notification { #define WATCH_INFO_FLAG_7 0x00800000 }; +/* + * Notification filtering rules (IOC_WATCH_QUEUE_SET_FILTER). + */ +struct watch_notification_type_filter { + __u32 type; /* Type to apply filter to */ + __u32 info_filter; /* Filter on watch_notification::info */ + __u32 info_mask; /* Mask of relevant bits in info_filter */ + __u32 subtype_filter[8]; /* Bitmask of subtypes to filter on */ +}; + +struct watch_notification_filter { + __u32 nr_filters; /* Number of filters */ + __u32 __reserved; /* Must be 0 */ + struct watch_notification_type_filter filters[]; +}; + /* * Extended watch removal notification. This is used optionally if the type -- cgit v1.2.3 From f7e47677e39a03057dcced2016c92a9c868693ec Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Jan 2020 17:07:11 +0000 Subject: watch_queue: Add a key/keyring notification facility Add a key/keyring change notification facility whereby notifications about changes in key and keyring content and attributes can be received. Firstly, an event queue needs to be created: pipe2(fds, O_NOTIFICATION_PIPE); ioctl(fds[1], IOC_WATCH_QUEUE_SET_SIZE, 256); then a notification can be set up to report notifications via that queue: struct watch_notification_filter filter = { .nr_filters = 1, .filters = { [0] = { .type = WATCH_TYPE_KEY_NOTIFY, .subtype_filter[0] = UINT_MAX, }, }, }; ioctl(fds[1], IOC_WATCH_QUEUE_SET_FILTER, &filter); keyctl_watch_key(KEY_SPEC_SESSION_KEYRING, fds[1], 0x01); After that, records will be placed into the queue when events occur in which keys are changed in some way. Records are of the following format: struct key_notification { struct watch_notification watch; __u32 key_id; __u32 aux; } *n; Where: n->watch.type will be WATCH_TYPE_KEY_NOTIFY. n->watch.subtype will indicate the type of event, such as NOTIFY_KEY_REVOKED. n->watch.info & WATCH_INFO_LENGTH will indicate the length of the record. n->watch.info & WATCH_INFO_ID will be the second argument to keyctl_watch_key(), shifted. n->key will be the ID of the affected key. n->aux will hold subtype-dependent information, such as the key being linked into the keyring specified by n->key in the case of NOTIFY_KEY_LINKED. Note that it is permissible for event records to be of variable length - or, at least, the length may be dependent on the subtype. Note also that the queue can be shared between multiple notifications of various types. Signed-off-by: David Howells Reviewed-by: James Morris --- include/uapi/linux/keyctl.h | 2 ++ include/uapi/linux/watch_queue.h | 28 +++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index ed3d5893830d..4c8884eea808 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -69,6 +69,7 @@ #define KEYCTL_RESTRICT_KEYRING 29 /* Restrict keys allowed to link to a keyring */ #define KEYCTL_MOVE 30 /* Move keys between keyrings */ #define KEYCTL_CAPABILITIES 31 /* Find capabilities of keyrings subsystem */ +#define KEYCTL_WATCH_KEY 32 /* Watch a key or ring of keys for changes */ /* keyctl structures */ struct keyctl_dh_params { @@ -130,5 +131,6 @@ struct keyctl_pkey_params { #define KEYCTL_CAPS0_MOVE 0x80 /* KEYCTL_MOVE supported */ #define KEYCTL_CAPS1_NS_KEYRING_NAME 0x01 /* Keyring names are per-user_namespace */ #define KEYCTL_CAPS1_NS_KEY_TAG 0x02 /* Key indexing can include a namespace tag */ +#define KEYCTL_CAPS1_NOTIFICATIONS 0x04 /* Keys generate watchable notifications */ #endif /* _LINUX_KEYCTL_H */ diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h index 3a5790f1f05d..c3d8320b5d3a 100644 --- a/include/uapi/linux/watch_queue.h +++ b/include/uapi/linux/watch_queue.h @@ -13,7 +13,8 @@ enum watch_notification_type { WATCH_TYPE_META = 0, /* Special record */ - WATCH_TYPE__NR = 1 + WATCH_TYPE_KEY_NOTIFY = 1, /* Key change event notification */ + WATCH_TYPE__NR = 2 }; enum watch_meta_notification_subtype { @@ -75,4 +76,29 @@ struct watch_notification_removal { __u64 id; /* Type-dependent identifier */ }; +/* + * Type of key/keyring change notification. + */ +enum key_notification_subtype { + NOTIFY_KEY_INSTANTIATED = 0, /* Key was instantiated (aux is error code) */ + NOTIFY_KEY_UPDATED = 1, /* Key was updated */ + NOTIFY_KEY_LINKED = 2, /* Key (aux) was added to watched keyring */ + NOTIFY_KEY_UNLINKED = 3, /* Key (aux) was removed from watched keyring */ + NOTIFY_KEY_CLEARED = 4, /* Keyring was cleared */ + NOTIFY_KEY_REVOKED = 5, /* Key was revoked */ + NOTIFY_KEY_INVALIDATED = 6, /* Key was invalidated */ + NOTIFY_KEY_SETATTR = 7, /* Key's attributes got changed */ +}; + +/* + * Key/keyring notification record. + * - watch.type = WATCH_TYPE_KEY_NOTIFY + * - watch.subtype = enum key_notification_type + */ +struct key_notification { + struct watch_notification watch; + __u32 key_id; /* The key/keyring affected */ + __u32 aux; /* Per-type auxiliary data */ +}; + #endif /* _UAPI_LINUX_WATCH_QUEUE_H */ -- cgit v1.2.3 From e3b1078bedd323df343894a27eb3b3c34944dfd1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 15 May 2020 13:41:41 -0700 Subject: fscrypt: add support for IV_INO_LBLK_32 policies The eMMC inline crypto standard will only specify 32 DUN bits (a.k.a. IV bits), unlike UFS's 64. IV_INO_LBLK_64 is therefore not applicable, but an encryption format which uses one key per policy and permits the moving of encrypted file contents (as f2fs's garbage collector requires) is still desirable. To support such hardware, add a new encryption format IV_INO_LBLK_32 that makes the best use of the 32 bits: the IV is set to 'SipHash-2-4(inode_number) + file_logical_block_number mod 2^32', where the SipHash key is derived from the fscrypt master key. We hash only the inode number and not also the block number, because we need to maintain contiguity of DUNs to merge bios. Unlike with IV_INO_LBLK_64, with this format IV reuse is possible; this is unavoidable given the size of the DUN. This means this format should only be used where the requirements of the first paragraph apply. However, the hash spreads out the IVs in the whole usable range, and the use of a keyed hash makes it difficult for an attacker to determine which files use which IVs. Besides the above differences, this flag works like IV_INO_LBLK_64 in that on ext4 it is only allowed if the stable_inodes feature has been enabled to prevent inode numbers and the filesystem UUID from changing. Link: https://lore.kernel.org/r/20200515204141.251098-1-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Reviewed-by: Paul Crowley Signed-off-by: Eric Biggers --- include/uapi/linux/fscrypt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index a10e3cdc2839..7875709ccfeb 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -19,7 +19,8 @@ #define FSCRYPT_POLICY_FLAGS_PAD_MASK 0x03 #define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08 -#define FSCRYPT_POLICY_FLAGS_VALID 0x0F +#define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32 0x10 +#define FSCRYPT_POLICY_FLAGS_VALID 0x1F /* Encryption algorithms */ #define FSCRYPT_MODE_AES_256_XTS 1 -- cgit v1.2.3 From 1b66d253610c7f8f257103808a9460223a087469 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 19 May 2020 00:45:45 +0200 Subject: bpf: Add get{peer, sock}name attach types for sock_addr As stated in 983695fa6765 ("bpf: fix unconnected udp hooks"), the objective for the existing cgroup connect/sendmsg/recvmsg/bind BPF hooks is to be transparent to applications. In Cilium we make use of these hooks [0] in order to enable E-W load balancing for existing Kubernetes service types for all Cilium managed nodes in the cluster. Those backends can be local or remote. The main advantage of this approach is that it operates as close as possible to the socket, and therefore allows to avoid packet-based NAT given in connect/sendmsg/recvmsg hooks we only need to xlate sock addresses. This also allows to expose NodePort services on loopback addresses in the host namespace, for example. As another advantage, this also efficiently blocks bind requests for applications in the host namespace for exposed ports. However, one missing item is that we also need to perform reverse xlation for inet{,6}_getname() hooks such that we can return the service IP/port tuple back to the application instead of the remote peer address. The vast majority of applications does not bother about getpeername(), but in a few occasions we've seen breakage when validating the peer's address since it returns unexpectedly the backend tuple instead of the service one. Therefore, this trivial patch allows to customise and adds a getpeername() as well as getsockname() BPF cgroup hook for both IPv4 and IPv6 in order to address this situation. Simple example: # ./cilium/cilium service list ID Frontend Service Type Backend 1 1.2.3.4:80 ClusterIP 1 => 10.0.0.10:80 Before; curl's verbose output example, no getpeername() reverse xlation: # curl --verbose 1.2.3.4 * Rebuilt URL to: 1.2.3.4/ * Trying 1.2.3.4... * TCP_NODELAY set * Connected to 1.2.3.4 (10.0.0.10) port 80 (#0) > GET / HTTP/1.1 > Host: 1.2.3.4 > User-Agent: curl/7.58.0 > Accept: */* [...] After; with getpeername() reverse xlation: # curl --verbose 1.2.3.4 * Rebuilt URL to: 1.2.3.4/ * Trying 1.2.3.4... * TCP_NODELAY set * Connected to 1.2.3.4 (1.2.3.4) port 80 (#0) > GET / HTTP/1.1 > Host: 1.2.3.4 > User-Agent: curl/7.58.0 > Accept: */* [...] Originally, I had both under a BPF_CGROUP_INET{4,6}_GETNAME type and exposed peer to the context similar as in inet{,6}_getname() fashion, but API-wise this is suboptimal as it always enforces programs having to test for ctx->peer which can easily be missed, hence BPF_CGROUP_INET{4,6}_GET{PEER,SOCK}NAME split. Similarly, the checked return code is on tnum_range(1, 1), but if a use case comes up in future, it can easily be changed to return an error code instead. Helper and ctx member access is the same as with connect/sendmsg/etc hooks. [0] https://github.com/cilium/cilium/blob/master/bpf/bpf_sock.c Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Andrey Ignatov Link: https://lore.kernel.org/bpf/61a479d759b2482ae3efb45546490bacd796a220.1589841594.git.daniel@iogearbox.net --- include/uapi/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b9b8a0f63b91..97e1fd19ff58 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -220,6 +220,10 @@ enum bpf_attach_type { BPF_MODIFY_RETURN, BPF_LSM_MAC, BPF_TRACE_ITER, + BPF_CGROUP_INET4_GETPEERNAME, + BPF_CGROUP_INET6_GETPEERNAME, + BPF_CGROUP_INET4_GETSOCKNAME, + BPF_CGROUP_INET6_GETSOCKNAME, __MAX_BPF_ATTACH_TYPE }; -- cgit v1.2.3 From faf1d25440d6ad06d509dada4b6fe62fea844370 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 13 May 2020 15:38:44 +0200 Subject: loop: Clean up LOOP_SET_STATUS lo_flags handling LOOP_SET_STATUS(64) will actually allow some lo_flags to be modified; in particular, LO_FLAGS_AUTOCLEAR can be set and cleared, whereas LO_FLAGS_PARTSCAN can be set to request a partition scan. Make this explicit by updating the UAPI to include the flags that can be set/cleared using this ioctl. The implementation can then blindly take over the passed in flags, and use the previous flags for those flags that can't be set / cleared using LOOP_SET_STATUS. Signed-off-by: Martijn Coenen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/uapi/linux/loop.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index 080a8df134ef..6b32fee80ce0 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -25,6 +25,12 @@ enum { LO_FLAGS_DIRECT_IO = 16, }; +/* LO_FLAGS that can be set using LOOP_SET_STATUS(64) */ +#define LOOP_SET_STATUS_SETTABLE_FLAGS (LO_FLAGS_AUTOCLEAR | LO_FLAGS_PARTSCAN) + +/* LO_FLAGS that can be cleared using LOOP_SET_STATUS(64) */ +#define LOOP_SET_STATUS_CLEARABLE_FLAGS (LO_FLAGS_AUTOCLEAR) + #include /* for __kernel_old_dev_t */ #include /* for __u64 */ @@ -37,7 +43,7 @@ struct loop_info { int lo_offset; int lo_encrypt_type; int lo_encrypt_key_size; /* ioctl w/o */ - int lo_flags; /* ioctl r/o */ + int lo_flags; char lo_name[LO_NAME_SIZE]; unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ unsigned long lo_init[2]; @@ -53,7 +59,7 @@ struct loop_info64 { __u32 lo_number; /* ioctl r/o */ __u32 lo_encrypt_type; __u32 lo_encrypt_key_size; /* ioctl w/o */ - __u32 lo_flags; /* ioctl r/o */ + __u32 lo_flags; __u8 lo_file_name[LO_NAME_SIZE]; __u8 lo_crypt_name[LO_NAME_SIZE]; __u8 lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ -- cgit v1.2.3 From 3448914e8cc550ba792d4ccc74471d1ca4293aae Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 13 May 2020 15:38:45 +0200 Subject: loop: Add LOOP_CONFIGURE ioctl This allows userspace to completely setup a loop device with a single ioctl, removing the in-between state where the device can be partially configured - eg the loop device has a backing file associated with it, but is reading from the wrong offset. Besides removing the intermediate state, another big benefit of this ioctl is that LOOP_SET_STATUS can be slow; the main reason for this slowness is that LOOP_SET_STATUS(64) calls blk_mq_freeze_queue() to freeze the associated queue; this requires waiting for RCU synchronization, which I've measured can take about 15-20ms on this device on average. In addition to doing what LOOP_SET_STATUS can do, LOOP_CONFIGURE can also be used to: - Set the correct block size immediately by setting loop_config.block_size (avoids LOOP_SET_BLOCK_SIZE) - Explicitly request direct I/O mode by setting LO_FLAGS_DIRECT_IO in loop_config.info.lo_flags (avoids LOOP_SET_DIRECT_IO) - Explicitly request read-only mode by setting LO_FLAGS_READ_ONLY in loop_config.info.lo_flags Here's setting up ~70 regular loop devices with an offset on an x86 Android device, using LOOP_SET_FD and LOOP_SET_STATUS: vsoc_x86:/system/apex # time for i in `seq 30 100`; do losetup -r -o 4096 /dev/block/loop$i com.android.adbd.apex; done 0m03.40s real 0m00.02s user 0m00.03s system Here's configuring ~70 devices in the same way, but using a modified losetup that uses the new LOOP_CONFIGURE ioctl: vsoc_x86:/system/apex # time for i in `seq 30 100`; do losetup -r -o 4096 /dev/block/loop$i com.android.adbd.apex; done 0m01.94s real 0m00.01s user 0m00.01s system Signed-off-by: Martijn Coenen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/uapi/linux/loop.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index 6b32fee80ce0..24a1c45bd1ae 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -31,6 +31,10 @@ enum { /* LO_FLAGS that can be cleared using LOOP_SET_STATUS(64) */ #define LOOP_SET_STATUS_CLEARABLE_FLAGS (LO_FLAGS_AUTOCLEAR) +/* LO_FLAGS that can be set using LOOP_CONFIGURE */ +#define LOOP_CONFIGURE_SETTABLE_FLAGS (LO_FLAGS_READ_ONLY | LO_FLAGS_AUTOCLEAR \ + | LO_FLAGS_PARTSCAN | LO_FLAGS_DIRECT_IO) + #include /* for __kernel_old_dev_t */ #include /* for __u64 */ @@ -66,6 +70,22 @@ struct loop_info64 { __u64 lo_init[2]; }; +/** + * struct loop_config - Complete configuration for a loop device. + * @fd: fd of the file to be used as a backing file for the loop device. + * @block_size: block size to use; ignored if 0. + * @info: struct loop_info64 to configure the loop device with. + * + * This structure is used with the LOOP_CONFIGURE ioctl, and can be used to + * atomically setup and configure all loop device parameters at once. + */ +struct loop_config { + __u32 fd; + __u32 block_size; + struct loop_info64 info; + __u64 __reserved[8]; +}; + /* * Loop filter types */ @@ -96,6 +116,7 @@ struct loop_info64 { #define LOOP_SET_CAPACITY 0x4C07 #define LOOP_SET_DIRECT_IO 0x4C08 #define LOOP_SET_BLOCK_SIZE 0x4C09 +#define LOOP_CONFIGURE 0x4C0A /* /dev/loop-control interface */ #define LOOP_CTL_ADD 0x4C80 -- cgit v1.2.3 From d8bed686ab96169ac80b497d1cbed89300d97f83 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Tue, 19 May 2020 22:45:20 +0800 Subject: net: psample: Add tunnel support Currently, psample can only send the packet bits after decapsulation. The tunnel information is lost. Add the tunnel support. If the sampled packet has no tunnel info, the behavior is the same as before. If it has, add a nested metadata field named PSAMPLE_ATTR_TUNNEL and include the tunnel subfields if applicable. Increase the metadata length for sampled packet with the tunnel info. If new subfields of tunnel info should be included, update the metadata length accordingly. Signed-off-by: Chris Mi Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/psample.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index ce1116cff53d..aea26ab1431c 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -11,6 +11,7 @@ enum { PSAMPLE_ATTR_GROUP_SEQ, PSAMPLE_ATTR_SAMPLE_RATE, PSAMPLE_ATTR_DATA, + PSAMPLE_ATTR_TUNNEL, /* commands attributes */ PSAMPLE_ATTR_GROUP_REFCOUNT, @@ -25,6 +26,27 @@ enum psample_command { PSAMPLE_CMD_DEL_GROUP, }; +enum psample_tunnel_key_attr { + PSAMPLE_TUNNEL_KEY_ATTR_ID, /* be64 Tunnel ID */ + PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC, /* be32 src IP address. */ + PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST, /* be32 dst IP address. */ + PSAMPLE_TUNNEL_KEY_ATTR_TOS, /* u8 Tunnel IP ToS. */ + PSAMPLE_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */ + PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ + PSAMPLE_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ + PSAMPLE_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */ + PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */ + PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */ + PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */ + PSAMPLE_TUNNEL_KEY_ATTR_VXLAN_OPTS, /* Nested VXLAN opts* */ + PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 address. */ + PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */ + PSAMPLE_TUNNEL_KEY_ATTR_PAD, + PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */ + PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE, /* No argument. IPV4_INFO_BRIDGE mode.*/ + __PSAMPLE_TUNNEL_KEY_ATTR_MAX +}; + /* Can be overridden at runtime by module option */ #define PSAMPLE_ATTR_MAX (__PSAMPLE_ATTR_MAX - 1) -- cgit v1.2.3 From 8066021915924f58ed338bf38208215f5a7355f6 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 20 May 2020 08:29:14 +0200 Subject: ethtool: provide UAPI for PHY Signal Quality Index (SQI) Signal Quality Index is a mandatory value required by "OPEN Alliance SIG" for the 100Base-T1 PHYs [1]. This indicator can be used for cable integrity diagnostic and investigating other noise sources and implement by at least two vendors: NXP[2] and TI[3]. [1] http://www.opensig.org/download/document/218/Advanced_PHY_features_for_automotive_Ethernet_V1.0.pdf [2] https://www.nxp.com/docs/en/data-sheet/TJA1100.pdf [3] https://www.ti.com/product/DP83TC811R-Q1 Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 2881af411f76..e6f109b76c9a 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -232,6 +232,8 @@ enum { ETHTOOL_A_LINKSTATE_UNSPEC, ETHTOOL_A_LINKSTATE_HEADER, /* nest - _A_HEADER_* */ ETHTOOL_A_LINKSTATE_LINK, /* u8 */ + ETHTOOL_A_LINKSTATE_SQI, /* u32 */ + ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ /* add new constants above here */ __ETHTOOL_A_LINKSTATE_CNT, -- cgit v1.2.3 From 38428d68719c454d269cb03b776d8a4b0ad66111 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Thu, 21 May 2020 22:26:13 -0700 Subject: nexthop: support for fdb ecmp nexthops This patch introduces ecmp nexthops and nexthop groups for mac fdb entries. In subsequent patches this is used by the vxlan driver fdb entries. The use case is E-VPN multihoming [1,2,3] which requires bridged vxlan traffic to be load balanced to remote switches (vteps) belonging to the same multi-homed ethernet segment (This is analogous to a multi-homed LAG but over vxlan). Changes include new nexthop flag NHA_FDB for nexthops referenced by fdb entries. These nexthops only have ip. This patch includes appropriate checks to avoid routes referencing such nexthops. example: $ip nexthop add id 12 via 172.16.1.2 fdb $ip nexthop add id 13 via 172.16.1.3 fdb $ip nexthop add id 102 group 12/13 fdb $bridge fdb add 02:02:00:00:00:13 dev vxlan1000 nhid 101 self [1] E-VPN https://tools.ietf.org/html/rfc7432 [2] E-VPN VxLAN: https://tools.ietf.org/html/rfc8365 [3] LPC talk with mention of nexthop groups for L2 ecmp http://vger.kernel.org/lpc_net2018_talks/scaling_bridge_fdb_database_slidesV3.pdf v4 - fixed uninitialized variable reported by kernel test robot Reported-by: kernel test robot Signed-off-by: Roopa Prabhu Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/nexthop.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index 7b61867e9848..2d4a1e784cf0 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -49,6 +49,9 @@ enum { NHA_GROUPS, /* flag; only return nexthop groups in dump */ NHA_MASTER, /* u32; only return nexthops with given master dev */ + NHA_FDB, /* flag; nexthop belongs to a bridge fdb */ + /* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */ + __NHA_MAX, }; -- cgit v1.2.3 From 1274e1cc42264d4e629841e4f182795cb0becfd2 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Thu, 21 May 2020 22:26:14 -0700 Subject: vxlan: ecmp support for mac fdb entries Todays vxlan mac fdb entries can point to multiple remote ips (rdsts) with the sole purpose of replicating broadcast-multicast and unknown unicast packets to those remote ips. E-VPN multihoming [1,2,3] requires bridged vxlan traffic to be load balanced to remote switches (vteps) belonging to the same multi-homed ethernet segment (E-VPN multihoming is analogous to multi-homed LAG implementations, but with the inter-switch peerlink replaced with a vxlan tunnel). In other words it needs support for mac ecmp. Furthermore, for faster convergence, E-VPN multihoming needs the ability to update fdb ecmp nexthops independent of the fdb entries. New route nexthop API is perfect for this usecase. This patch extends the vxlan fdb code to take a nexthop id pointing to an ecmp nexthop group. Changes include: - New NDA_NH_ID attribute for fdbs - Use the newly added fdb nexthop groups - makes vxlan rdsts and nexthop handling code mutually exclusive - since this is a new use-case and the requirement is for ecmp nexthop groups, the fdb add and update path checks that the nexthop is really an ecmp nexthop group. This check can be relaxed in the future, if we want to introduce replication fdb nexthop groups and allow its use in lieu of current rdst lists. - fdb update requests with nexthop id's only allowed for existing fdb's that have nexthop id's - learning will not override an existing fdb entry with nexthop group - I have wrapped the switchdev offload code around the presence of rdst [1] E-VPN RFC https://tools.ietf.org/html/rfc7432 [2] E-VPN with vxlan https://tools.ietf.org/html/rfc8365 [3] http://vger.kernel.org/lpc_net2018_talks/scaling_bridge_fdb_database_slidesV3.pdf Includes a null check fix in vxlan_xmit from Nikolay v2 - Fixed build issue: Reported-by: kbuild test robot Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index cd144e3099a3..eefcda8ca44e 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -29,6 +29,7 @@ enum { NDA_LINK_NETNSID, NDA_SRC_VNI, NDA_PROTOCOL, /* Originator of entry */ + NDA_NH_ID, __NDA_MAX }; -- cgit v1.2.3 From 31344b2fcead3239c5b801016d9bae82506b92c2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 11 May 2020 14:49:10 +0200 Subject: btrfs: remove more obsolete v0 extent ref declarations The extent references v0 have been superseded long time go, there are some unused declarations of access helpers. We can safely remove them now. The struct btrfs_extent_ref_v0 is not used anywhere, but struct btrfs_extent_item_v0 is still part of a backward compatibility check in relocation.c and thus not removed. Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 8e322e2c7e78..a3f3975df0de 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -519,15 +519,6 @@ struct btrfs_extent_inline_ref { __le64 offset; } __attribute__ ((__packed__)); -/* old style backrefs item */ -struct btrfs_extent_ref_v0 { - __le64 root; - __le64 generation; - __le64 objectid; - __le32 count; -} __attribute__ ((__packed__)); - - /* dev extents record free space on individual devices. The owner * field points back to the chunk allocation mapping tree that allocated * the extent. The chunk tree uuid field is a way to double check the owner -- cgit v1.2.3 From 61aec25a6db5d0c2e8ab5da6d2d152269d0d9d69 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 26 May 2020 14:29:04 +0200 Subject: cls_flower: Support filtering on multiple MPLS Label Stack Entries With struct flow_dissector_key_mpls now recording the first FLOW_DIS_MPLS_MAX labels, we can extend Flower to filter on any of these LSEs independently. In order to avoid creating new netlink attributes for every possible depth, let's define a new TCA_FLOWER_KEY_MPLS_OPTS nested attribute that contains the list of LSEs to match. Each LSE is represented by another attribute, TCA_FLOWER_KEY_MPLS_OPTS_LSE, which then contains the attributes representing the depth and the MPLS fields to match at this depth (label, TTL, etc.). For each MPLS field, the mask is always set to all-ones, as this is what the original API did. We could allow user configurable masks in the future if there is demand for more flexibility. The new API also allows to only specify an LSE depth. In that case, Flower only verifies that the MPLS label stack depth is greater or equal to the provided depth (that is, an LSE exists at this depth). Filters that only match on one (or more) fields of the first LSE are dumped using the old netlink attributes, to avoid confusing user space programs that don't understand the new API. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index fc672b232437..7576209d96f9 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -576,6 +576,8 @@ enum { TCA_FLOWER_KEY_CT_LABELS, /* u128 */ TCA_FLOWER_KEY_CT_LABELS_MASK, /* u128 */ + TCA_FLOWER_KEY_MPLS_OPTS, + __TCA_FLOWER_MAX, }; @@ -640,6 +642,27 @@ enum { #define TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX \ (__TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX - 1) +enum { + TCA_FLOWER_KEY_MPLS_OPTS_UNSPEC, + TCA_FLOWER_KEY_MPLS_OPTS_LSE, + __TCA_FLOWER_KEY_MPLS_OPTS_MAX, +}; + +#define TCA_FLOWER_KEY_MPLS_OPTS_MAX (__TCA_FLOWER_KEY_MPLS_OPTS_MAX - 1) + +enum { + TCA_FLOWER_KEY_MPLS_OPT_LSE_UNSPEC, + TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH, + TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL, + TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS, + TCA_FLOWER_KEY_MPLS_OPT_LSE_TC, + TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL, + __TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX, +}; + +#define TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX \ + (__TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX - 1) + enum { TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), -- cgit v1.2.3 From a331172b156b23e83dfb556ade0ca23426c3f149 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 27 May 2020 00:21:37 +0200 Subject: net: ethtool: Add attributes for cable test TDR data Some Ethernet PHYs can return the raw time domain reflectromatry data. Add the attributes to allow this data to be requested and returned via netlink ethtool. Signed-off-by: Andrew Lunn v2: m -> cm Report what the PHY actually used for start/stop/step. Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 63 ++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index e6f109b76c9a..739faa7070c6 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -40,6 +40,7 @@ enum { ETHTOOL_MSG_EEE_SET, ETHTOOL_MSG_TSINFO_GET, ETHTOOL_MSG_CABLE_TEST_ACT, + ETHTOOL_MSG_CABLE_TEST_TDR_ACT, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -76,6 +77,7 @@ enum { ETHTOOL_MSG_EEE_NTF, ETHTOOL_MSG_TSINFO_GET_REPLY, ETHTOOL_MSG_CABLE_TEST_NTF, + ETHTOOL_MSG_CABLE_TEST_TDR_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -478,6 +480,67 @@ enum { ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1) }; +/* CABLE TEST TDR */ + +enum { + ETHTOOL_A_CABLE_TEST_TDR_UNSPEC, + ETHTOOL_A_CABLE_TEST_TDR_HEADER, /* nest - _A_HEADER_* */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_TDR_CNT, + ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1 +}; + +/* CABLE TEST TDR NOTIFY */ + +enum { + ETHTOOL_A_CABLE_AMPLITUDE_UNSPEC, + ETHTOOL_A_CABLE_AMPLITUDE_PAIR, /* u8 */ + ETHTOOL_A_CABLE_AMPLITUDE_mV, /* s16 */ + + __ETHTOOL_A_CABLE_AMPLITUDE_CNT, + ETHTOOL_A_CABLE_AMPLITUDE_MAX = (__ETHTOOL_A_CABLE_AMPLITUDE_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_PULSE_UNSPEC, + ETHTOOL_A_CABLE_PULSE_mV, /* s16 */ + + __ETHTOOL_A_CABLE_PULSE_CNT, + ETHTOOL_A_CABLE_PULSE_MAX = (__ETHTOOL_A_CABLE_PULSE_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_STEP_UNSPEC, + ETHTOOL_A_CABLE_STEP_FIRST_DISTANCE, /* u32 */ + ETHTOOL_A_CABLE_STEP_LAST_DISTANCE, /* u32 */ + ETHTOOL_A_CABLE_STEP_STEP_DISTANCE, /* u32 */ + + __ETHTOOL_A_CABLE_STEP_CNT, + ETHTOOL_A_CABLE_STEP_MAX = (__ETHTOOL_A_CABLE_STEP_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_TDR_NEST_UNSPEC, + ETHTOOL_A_CABLE_TDR_NEST_STEP, /* nest - ETHTTOOL_A_CABLE_STEP */ + ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE, /* nest - ETHTOOL_A_CABLE_AMPLITUDE */ + ETHTOOL_A_CABLE_TDR_NEST_PULSE, /* nest - ETHTOOL_A_CABLE_PULSE */ + + __ETHTOOL_A_CABLE_TDR_NEST_CNT, + ETHTOOL_A_CABLE_TDR_NEST_MAX = (__ETHTOOL_A_CABLE_TDR_NEST_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC, + ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ + ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, /* nest - of results: */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT, + ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3 From f2bc8ad31a7f814237bc6301d59296d76505a688 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 27 May 2020 00:21:41 +0200 Subject: net: ethtool: Allow PHY cable test TDR data to configured Allow the user to configure where on the cable the TDR data should be retrieved, in terms of first and last sample, and the step between samples. Also add the ability to ask for TDR data for just one pair. If this configuration is not provided, it defaults to 1-150m at 1m intervals for all pairs. Signed-off-by: Andrew Lunn v3: Move the TDR configuration into a structure Add a range check on step Use NL_SET_ERR_MSG_ATTR() when appropriate Move TDR configuration into a nest Document attributes in the request Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 739faa7070c6..fc9051f2eeac 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -482,9 +482,22 @@ enum { /* CABLE TEST TDR */ +enum { + ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC, + ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST, /* u32 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST, /* u32 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP, /* u32 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, /* u8 */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT, + ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1 +}; + enum { ETHTOOL_A_CABLE_TEST_TDR_UNSPEC, ETHTOOL_A_CABLE_TEST_TDR_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_TDR_CFG, /* nest - *_TDR_CFG_* */ /* add new constants above here */ __ETHTOOL_A_CABLE_TEST_TDR_CNT, -- cgit v1.2.3 From 33462e68231bccfe563a87614f4c4dd5d333837c Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Fri, 24 Apr 2020 14:29:03 +0300 Subject: cfg80211: add support for TID specific AMSDU configuration This patch adds support to control per TID MSDU aggregation using the NL80211_TID_CONFIG_ATTR_AMSDU_CTRL attribute. Signed-off-by: Sergey Matyukevich Link: https://lore.kernel.org/r/20200424112905.26770-4-sergey.matyukevich.os@quantenna.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9679d561f7d0..1ccb0bf657ec 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4844,12 +4844,15 @@ enum nl80211_tid_config { * &NL80211_CMD_SET_TID_CONFIG. Its type is u8, min value is 1 and * the max value is advertised by the driver in this attribute on * output in wiphy capabilities. - * @NL80211_TID_CONFIG_ATTR_AMPDU_CTRL: Enable/Disable aggregation for the TIDs - * specified in %NL80211_TID_CONFIG_ATTR_TIDS. Its type is u8, using - * the values from &nl80211_tid_config. + * @NL80211_TID_CONFIG_ATTR_AMPDU_CTRL: Enable/Disable MPDU aggregation + * for the TIDs specified in %NL80211_TID_CONFIG_ATTR_TIDS. + * Its type is u8, using the values from &nl80211_tid_config. * @NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL: Enable/Disable RTS_CTS for the TIDs * specified in %NL80211_TID_CONFIG_ATTR_TIDS. It is u8 type, using * the values from &nl80211_tid_config. + * @NL80211_TID_CONFIG_ATTR_AMSDU_CTRL: Enable/Disable MSDU aggregation + * for the TIDs specified in %NL80211_TID_CONFIG_ATTR_TIDS. + * Its type is u8, using the values from &nl80211_tid_config. */ enum nl80211_tid_config_attr { __NL80211_TID_CONFIG_ATTR_INVALID, @@ -4863,6 +4866,7 @@ enum nl80211_tid_config_attr { NL80211_TID_CONFIG_ATTR_RETRY_LONG, NL80211_TID_CONFIG_ATTR_AMPDU_CTRL, NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL, + NL80211_TID_CONFIG_ATTR_AMSDU_CTRL, /* keep last */ __NL80211_TID_CONFIG_ATTR_AFTER_LAST, -- cgit v1.2.3 From c03369558c435f7e82f7c06b0173fa73c1ed15c0 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Fri, 24 Apr 2020 14:29:04 +0300 Subject: nl80211: simplify peer specific TID configuration Current rule for applying TID configuration for specific peer looks overly complicated. No need to reject new TID configuration when override flag is specified. Another call with the same TID configuration, but without override flag, allows to apply new configuration anyway. Use the same approach as for the 'all peers' case: if override flag is specified, then reset existing TID configuration and immediately apply a new one. Signed-off-by: Sergey Matyukevich Link: https://lore.kernel.org/r/20200424112905.26770-5-sergey.matyukevich.os@quantenna.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1ccb0bf657ec..d1b1d9e49887 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4823,12 +4823,10 @@ enum nl80211_tid_config { * (%NL80211_TID_CONFIG_ATTR_TIDS, %NL80211_TID_CONFIG_ATTR_OVERRIDE). * @NL80211_TID_CONFIG_ATTR_PEER_SUPP: same as the previous per-vif one, but * per peer instead. - * @NL80211_TID_CONFIG_ATTR_OVERRIDE: flag attribue, if no peer - * is selected, if set indicates that the new configuration overrides - * all previous peer configurations, otherwise previous peer specific - * configurations should be left untouched. If peer is selected then - * it will reset particular TID configuration of that peer and it will - * not accept other TID config attributes along with peer. + * @NL80211_TID_CONFIG_ATTR_OVERRIDE: flag attribue, if set indicates + * that the new configuration overrides all previous peer + * configurations, otherwise previous peer specific configurations + * should be left untouched. * @NL80211_TID_CONFIG_ATTR_TIDS: a bitmask value of TIDs (bit 0 to 7) * Its type is u16. * @NL80211_TID_CONFIG_ATTR_NOACK: Configure ack policy for the TID. -- cgit v1.2.3 From 942ba88ba9c87f5e225574f1f0d6548f0105ed73 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 30 Apr 2020 10:25:51 -0700 Subject: nl80211: add KHz frequency offset for most wifi commands cfg80211 recently gained the ability to understand a frequency offset component in KHz. Expose this in nl80211 through the new attributes NL80211_ATTR_WIPHY_FREQ_OFFSET, NL80211_FREQUENCY_ATTR_OFFSET, NL80211_ATTR_CENTER_FREQ1_OFFSET, and NL80211_BSS_FREQUENCY_OFFSET. These add support to send and receive a KHz offset component with the following NL80211 commands: - NL80211_CMD_FRAME - NL80211_CMD_GET_SCAN - NL80211_CMD_AUTHENTICATE - NL80211_CMD_ASSOCIATE - NL80211_CMD_CONNECT Along with any other command which takes a chandef, ie: - NL80211_CMD_SET_CHANNEL - NL80211_CMD_SET_WIPHY - NL80211_CMD_START_AP - NL80211_CMD_RADAR_DETECT - NL80211_CMD_NOTIFY_RADAR - NL80211_CMD_CHANNEL_SWITCH - NL80211_JOIN_IBSS - NL80211_CMD_REMAIN_ON_CHANNEL - NL80211_CMD_JOIN_OCB - NL80211_CMD_JOIN_MESH - NL80211_CMD_TDLS_CHANNEL_SWITCH If the driver advertises a band containing channels with frequency offset, it must also verify support for frequency offset channels in its cfg80211 ops, or return an error. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200430172554.18383-3-thomas@adapt-ip.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 50 +++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d1b1d9e49887..b1cd132c1d27 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -296,13 +296,14 @@ * to get a list of all present wiphys. * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME, - * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ (and the - * attributes determining the channel width; this is used for setting - * monitor mode channel), %NL80211_ATTR_WIPHY_RETRY_SHORT, - * %NL80211_ATTR_WIPHY_RETRY_LONG, %NL80211_ATTR_WIPHY_FRAG_THRESHOLD, - * and/or %NL80211_ATTR_WIPHY_RTS_THRESHOLD. - * However, for setting the channel, see %NL80211_CMD_SET_CHANNEL - * instead, the support here is for backward compatibility only. + * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ, + * %NL80211_ATTR_WIPHY_FREQ_OFFSET (and the attributes determining the + * channel width; this is used for setting monitor mode channel), + * %NL80211_ATTR_WIPHY_RETRY_SHORT, %NL80211_ATTR_WIPHY_RETRY_LONG, + * %NL80211_ATTR_WIPHY_FRAG_THRESHOLD, and/or + * %NL80211_ATTR_WIPHY_RTS_THRESHOLD. However, for setting the channel, + * see %NL80211_CMD_SET_CHANNEL instead, the support here is for backward + * compatibility only. * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request * or rename notification. Has attributes %NL80211_ATTR_WIPHY and * %NL80211_ATTR_WIPHY_NAME. @@ -351,7 +352,8 @@ * %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_INACTIVITY_TIMEOUT, * %NL80211_ATTR_ACL_POLICY and %NL80211_ATTR_MAC_ADDRS. * The channel to use can be set on the interface or be given using the - * %NL80211_ATTR_WIPHY_FREQ and the attributes determining channel width. + * %NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_WIPHY_FREQ_OFFSET, and the + * attributes determining channel width. * @NL80211_CMD_NEW_BEACON: old alias for %NL80211_CMD_START_AP * @NL80211_CMD_STOP_AP: Stop AP operation on the given interface * @NL80211_CMD_DEL_BEACON: old alias for %NL80211_CMD_STOP_AP @@ -536,11 +538,12 @@ * interface. %NL80211_ATTR_MAC is used to specify PeerSTAAddress (and * BSSID in case of station mode). %NL80211_ATTR_SSID is used to specify * the SSID (mainly for association, but is included in authentication - * request, too, to help BSS selection. %NL80211_ATTR_WIPHY_FREQ is used - * to specify the frequence of the channel in MHz. %NL80211_ATTR_AUTH_TYPE - * is used to specify the authentication type. %NL80211_ATTR_IE is used to - * define IEs (VendorSpecificInfo, but also including RSN IE and FT IEs) - * to be added to the frame. + * request, too, to help BSS selection. %NL80211_ATTR_WIPHY_FREQ + + * %NL80211_ATTR_WIPHY_FREQ_OFFSET is used to specify the frequence of the + * channel in MHz. %NL80211_ATTR_AUTH_TYPE is used to specify the + * authentication type. %NL80211_ATTR_IE is used to define IEs + * (VendorSpecificInfo, but also including RSN IE and FT IEs) to be added + * to the frame. * When used as an event, this reports reception of an Authentication * frame in station and IBSS modes when the local MLME processed the * frame, i.e., it was for the local STA and was received in correct @@ -595,8 +598,9 @@ * requests to connect to a specified network but without separating * auth and assoc steps. For this, you need to specify the SSID in a * %NL80211_ATTR_SSID attribute, and can optionally specify the association - * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_USE_MFP, - * %NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT, + * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, + * %NL80211_ATTR_USE_MFP, %NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ, + * %NL80211_ATTR_WIPHY_FREQ_OFFSET, %NL80211_ATTR_CONTROL_PORT, * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, * %NL80211_ATTR_CONTROL_PORT_OVER_NL80211, %NL80211_ATTR_MAC_HINT, and @@ -1433,7 +1437,8 @@ enum nl80211_commands { * of &enum nl80211_chan_width, describing the channel width. See the * documentation of the enum for more information. * @NL80211_ATTR_CENTER_FREQ1: Center frequency of the first part of the - * channel, used for anything but 20 MHz bandwidth + * channel, used for anything but 20 MHz bandwidth. In S1G this is the + * operating channel center frequency. * @NL80211_ATTR_CENTER_FREQ2: Center frequency of the second part of the * channel, used only for 80+80 MHz bandwidth * @NL80211_ATTR_WIPHY_CHANNEL_TYPE: included with NL80211_ATTR_WIPHY_FREQ @@ -2480,9 +2485,14 @@ enum nl80211_commands { * entry without having to force a disconnection after the PMK timeout. If * no roaming occurs between the reauth threshold and PMK expiration, * disassociation is still forced. - * * @NL80211_ATTR_RECEIVE_MULTICAST: multicast flag for the * %NL80211_CMD_REGISTER_FRAME command, see the description there. + * @NL80211_ATTR_WIPHY_FREQ_OFFSET: offset of the associated + * %NL80211_ATTR_WIPHY_FREQ in positive KHz. Only valid when supplied with + * an %NL80211_ATTR_WIPHY_FREQ_OFFSET. + * @NL80211_ATTR_CENTER_FREQ1_OFFSET: Center frequency offset in KHz for the + * first channel segment specified in %NL80211_ATTR_CENTER_FREQ1. + * * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -2960,6 +2970,8 @@ enum nl80211_attrs { NL80211_ATTR_PMK_REAUTH_THRESHOLD, NL80211_ATTR_RECEIVE_MULTICAST, + NL80211_ATTR_WIPHY_FREQ_OFFSET, + NL80211_ATTR_CENTER_FREQ1_OFFSET, /* add attributes here, update the policy in nl80211.c */ @@ -3682,6 +3694,7 @@ enum nl80211_wmm_rule { * (see &enum nl80211_wmm_rule) * @NL80211_FREQUENCY_ATTR_NO_HE: HE operation is not allowed on this channel * in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_OFFSET: frequency offset in KHz * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -3712,6 +3725,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_10MHZ, NL80211_FREQUENCY_ATTR_WMM, NL80211_FREQUENCY_ATTR_NO_HE, + NL80211_FREQUENCY_ATTR_OFFSET, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4482,6 +4496,7 @@ enum nl80211_bss_scan_width { * @NL80211_BSS_CHAIN_SIGNAL: per-chain signal strength of last BSS update. * Contains a nested array of signal strength attributes (u8, dBm), * using the nesting index as the antenna number. + * @NL80211_BSS_FREQUENCY_OFFSET: frequency offset in KHz * @__NL80211_BSS_AFTER_LAST: internal * @NL80211_BSS_MAX: highest BSS attribute */ @@ -4506,6 +4521,7 @@ enum nl80211_bss { NL80211_BSS_PARENT_TSF, NL80211_BSS_PARENT_BSSID, NL80211_BSS_CHAIN_SIGNAL, + NL80211_BSS_FREQUENCY_OFFSET, /* keep last */ __NL80211_BSS_AFTER_LAST, -- cgit v1.2.3 From 2032f3b2f943256ff40df23182913dfc7e73ec6a Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 30 Apr 2020 10:25:52 -0700 Subject: nl80211: support scan frequencies in KHz If the driver advertises NL80211_EXT_FEATURE_SCAN_FREQ_KHZ userspace can omit NL80211_ATTR_SCAN_FREQUENCIES in favor of an NL80211_ATTR_SCAN_FREQ_KHZ. To get scan results in KHz userspace must also set the NL80211_SCAN_FLAG_FREQ_KHZ. This lets nl80211 remain compatible with older userspaces while not requring and sending redundant (and potentially incorrect) scan frequency sets. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200430172554.18383-4-thomas@adapt-ip.com [use just nla_nest_start() (not _noflag) for NL80211_ATTR_SCAN_FREQ_KHZ] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index b1cd132c1d27..47d39b6a073d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2492,7 +2492,7 @@ enum nl80211_commands { * an %NL80211_ATTR_WIPHY_FREQ_OFFSET. * @NL80211_ATTR_CENTER_FREQ1_OFFSET: Center frequency offset in KHz for the * first channel segment specified in %NL80211_ATTR_CENTER_FREQ1. - * + * @NL80211_ATTR_SCAN_FREQ_KHZ: nested attribute with KHz frequencies * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -2972,6 +2972,7 @@ enum nl80211_attrs { NL80211_ATTR_RECEIVE_MULTICAST, NL80211_ATTR_WIPHY_FREQ_OFFSET, NL80211_ATTR_CENTER_FREQ1_OFFSET, + NL80211_ATTR_SCAN_FREQ_KHZ, /* add attributes here, update the policy in nl80211.c */ @@ -5723,6 +5724,11 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS: management frame registrations * are possible for multicast frames and those will be reported properly. * + * @NL80211_EXT_FEATURE_SCAN_FREQ_KHZ: This driver supports receiving and + * reporting scan request with %NL80211_ATTR_SCAN_FREQ_KHZ. In order to + * report %NL80211_ATTR_SCAN_FREQ_KHZ, %NL80211_SCAN_FLAG_FREQ_KHZ must be + * included in the scan request. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5776,6 +5782,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_DEL_IBSS_STA, NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS, NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT, + NL80211_EXT_FEATURE_SCAN_FREQ_KHZ, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, @@ -5887,6 +5894,9 @@ enum nl80211_timeout_reason { * @NL80211_SCAN_FLAG_MIN_PREQ_CONTENT: minimize probe request content to * only have supported rates and no additional capabilities (unless * added by userspace explicitly.) + * @NL80211_SCAN_FLAG_FREQ_KHZ: report scan results with + * %NL80211_ATTR_SCAN_FREQ_KHZ. This also means + * %NL80211_ATTR_SCAN_FREQUENCIES will not be included. */ enum nl80211_scan_flags { NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, @@ -5902,6 +5912,7 @@ enum nl80211_scan_flags { NL80211_SCAN_FLAG_HIGH_ACCURACY = 1<<10, NL80211_SCAN_FLAG_RANDOM_SN = 1<<11, NL80211_SCAN_FLAG_MIN_PREQ_CONTENT = 1<<12, + NL80211_SCAN_FLAG_FREQ_KHZ = 1<<13, }; /** -- cgit v1.2.3 From dca9ca2d588bd2c0989c671f048540b82e57cf1e Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Fri, 8 May 2020 16:42:00 +0200 Subject: nl80211: add ability to report TX status for control port TX This adds the necessary capabilities in nl80211 to allow drivers to assign a cookie to control port TX frames (returned via extack in the netlink ACK message of the command) and then later report the frame's status. Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20200508144202.7678-2-markus.theil@tu-ilmenau.de [use extack cookie instead of explicit message, recombine patches] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 47d39b6a073d..0f324b6b81cc 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1164,6 +1164,12 @@ * dropped because it did not include a valid MME MIC while beacon * protection was enabled (BIGTK configured in station mode). * + * @NL80211_CMD_CONTROL_PORT_FRAME_TX_STATUS: Report TX status of a control + * port frame transmitted with %NL80211_CMD_CONTROL_PORT_FRAME. + * %NL80211_ATTR_COOKIE identifies the TX command and %NL80211_ATTR_FRAME + * includes the contents of the frame. %NL80211_ATTR_ACK flag is included + * if the recipient acknowledged the frame. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1392,6 +1398,8 @@ enum nl80211_commands { NL80211_CMD_UNPROT_BEACON, + NL80211_CMD_CONTROL_PORT_FRAME_TX_STATUS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -5729,6 +5737,9 @@ enum nl80211_feature_flags { * report %NL80211_ATTR_SCAN_FREQ_KHZ, %NL80211_SCAN_FLAG_FREQ_KHZ must be * included in the scan request. * + * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS: The driver + * can report tx status for control port over nl80211 tx operations. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5783,6 +5794,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS, NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT, NL80211_EXT_FEATURE_SCAN_FREQ_KHZ, + NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 9a5f6488623730dc16cca0836ade23869761adee Mon Sep 17 00:00:00 2001 From: Tamizh Chelvam Date: Wed, 13 May 2020 13:41:44 +0530 Subject: nl80211: Add support to configure TID specific Tx rate configuration This patch adds support to configure per TID Tx Rate configuration through NL80211_TID_CONFIG_ATTR_TX_RATE* attributes. And it uses nl80211_parse_tx_bitrate_mask api to validate the Tx rate mask. Signed-off-by: Tamizh Chelvam Link: https://lore.kernel.org/r/1589357504-10175-1-git-send-email-tamizhr@codeaurora.org Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 0f324b6b81cc..c14666b75e57 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4841,6 +4841,17 @@ enum nl80211_tid_config { NL80211_TID_CONFIG_DISABLE, }; +/* enum nl80211_tx_rate_setting - TX rate configuration type + * @NL80211_TX_RATE_AUTOMATIC: automatically determine TX rate + * @NL80211_TX_RATE_LIMITED: limit the TX rate by the TX rate parameter + * @NL80211_TX_RATE_FIXED: fix TX rate to the TX rate parameter + */ +enum nl80211_tx_rate_setting { + NL80211_TX_RATE_AUTOMATIC, + NL80211_TX_RATE_LIMITED, + NL80211_TX_RATE_FIXED, +}; + /* enum nl80211_tid_config_attr - TID specific configuration. * @NL80211_TID_CONFIG_ATTR_PAD: pad attribute for 64-bit values * @NL80211_TID_CONFIG_ATTR_VIF_SUPP: a bitmap (u64) of attributes supported @@ -4876,6 +4887,14 @@ enum nl80211_tid_config { * @NL80211_TID_CONFIG_ATTR_AMSDU_CTRL: Enable/Disable MSDU aggregation * for the TIDs specified in %NL80211_TID_CONFIG_ATTR_TIDS. * Its type is u8, using the values from &nl80211_tid_config. + * @NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE: This attribute will be useful + * to notfiy the driver that what type of txrate should be used + * for the TIDs specified in %NL80211_TID_CONFIG_ATTR_TIDS. using + * the values form &nl80211_tx_rate_setting. + * @NL80211_TID_CONFIG_ATTR_TX_RATE: Data frame TX rate mask should be applied + * with the parameters passed through %NL80211_ATTR_TX_RATES. + * configuration is applied to the data frame for the tid to that connected + * station. */ enum nl80211_tid_config_attr { __NL80211_TID_CONFIG_ATTR_INVALID, @@ -4890,6 +4909,8 @@ enum nl80211_tid_config_attr { NL80211_TID_CONFIG_ATTR_AMPDU_CTRL, NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL, NL80211_TID_CONFIG_ATTR_AMSDU_CTRL, + NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE, + NL80211_TID_CONFIG_ATTR_TX_RATE, /* keep last */ __NL80211_TID_CONFIG_ATTR_AFTER_LAST, -- cgit v1.2.3 From 1b9ae0c92925ac40489be526d67d0010d0724ce0 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Thu, 21 May 2020 22:14:22 +0200 Subject: wireless: Use linux/stddef.h instead of stddef.h When compiling inside the kernel include linux/stddef.h instead of stddef.h. When I compile this header file in backports for power PC I run into a conflict with ptrdiff_t. I was unable to reproduce this in mainline kernel. I still would like to fix this problem in the kernel. Fixes: 6989310f5d43 ("wireless: Use offsetof instead of custom macro.") Signed-off-by: Hauke Mehrtens Link: https://lore.kernel.org/r/20200521201422.16493-1-hauke@hauke-m.de Signed-off-by: Johannes Berg --- include/uapi/linux/wireless.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/wireless.h b/include/uapi/linux/wireless.h index a2c006a364e0..24f3371ad826 100644 --- a/include/uapi/linux/wireless.h +++ b/include/uapi/linux/wireless.h @@ -74,7 +74,11 @@ #include /* for "struct sockaddr" et al */ #include /* for IFNAMSIZ and co... */ -#include /* for offsetof */ +#ifdef __KERNEL__ +# include /* for offsetof */ +#else +# include /* for offsetof */ +#endif /***************************** VERSION *****************************/ /* -- cgit v1.2.3 From 3234ac664a870e6ea69ae3a57d824cd7edbeacc5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 May 2020 14:06:17 -0700 Subject: /dev/mem: Revoke mappings when a driver claims the region Close the hole of holding a mapping over kernel driver takeover event of a given address range. Commit 90a545e98126 ("restrict /dev/mem to idle io memory ranges") introduced CONFIG_IO_STRICT_DEVMEM with the goal of protecting the kernel against scenarios where a /dev/mem user tramples memory that a kernel driver owns. However, this protection only prevents *new* read(), write() and mmap() requests. Established mappings prior to the driver calling request_mem_region() are left alone. Especially with persistent memory, and the core kernel metadata that is stored there, there are plentiful scenarios for a /dev/mem user to violate the expectations of the driver and cause amplified damage. Teach request_mem_region() to find and shoot down active /dev/mem mappings that it believes it has successfully claimed for the exclusive use of the driver. Effectively a driver call to request_mem_region() becomes a hole-punch on the /dev/mem device. The typical usage of unmap_mapping_range() is part of truncate_pagecache() to punch a hole in a file, but in this case the implementation is only doing the "first half" of a hole punch. Namely it is just evacuating current established mappings of the "hole", and it relies on the fact that /dev/mem establishes mappings in terms of absolute physical address offsets. Once existing mmap users are invalidated they can attempt to re-establish the mapping, or attempt to continue issuing read(2) / write(2) to the invalidated extent, but they will then be subject to the CONFIG_IO_STRICT_DEVMEM checking that can block those subsequent accesses. Cc: Arnd Bergmann Cc: Ingo Molnar Cc: Kees Cook Cc: Matthew Wilcox Cc: Russell King Cc: Andrew Morton Cc: Greg Kroah-Hartman Fixes: 90a545e98126 ("restrict /dev/mem to idle io memory ranges") Signed-off-by: Dan Williams Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/159009507306.847224.8502634072429766747.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index d78064007b17..f3956fc11de6 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -94,6 +94,7 @@ #define BALLOON_KVM_MAGIC 0x13661366 #define ZSMALLOC_MAGIC 0x58295829 #define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */ +#define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ #define Z3FOLD_MAGIC 0x33 #define PPC_CMM_MAGIC 0xc7571590 -- cgit v1.2.3 From 20f6a05ef63594feb0c6dfbd629da0448b43124d Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 27 May 2020 12:34:30 +0000 Subject: bridge: mrp: Rework the MRP netlink interface This patch reworks the MRP netlink interface. Before, each attribute represented a binary structure which made it hard to be extended. Therefore update the MRP netlink interface such that each existing attribute to be a nested attribute which contains the fields of the binary structures. In this way the MRP netlink interface can be extended without breaking the backwards compatibility. It is also using strict checking for attributes under the MRP top attribute. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 64 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index bd8c95488f16..5a43eb86c93b 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -169,17 +169,69 @@ enum { __IFLA_BRIDGE_MRP_MAX, }; +#define IFLA_BRIDGE_MRP_MAX (__IFLA_BRIDGE_MRP_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_INSTANCE_UNSPEC, + IFLA_BRIDGE_MRP_INSTANCE_RING_ID, + IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX, + IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX, + __IFLA_BRIDGE_MRP_INSTANCE_MAX, +}; + +#define IFLA_BRIDGE_MRP_INSTANCE_MAX (__IFLA_BRIDGE_MRP_INSTANCE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_PORT_STATE_UNSPEC, + IFLA_BRIDGE_MRP_PORT_STATE_STATE, + __IFLA_BRIDGE_MRP_PORT_STATE_MAX, +}; + +#define IFLA_BRIDGE_MRP_PORT_STATE_MAX (__IFLA_BRIDGE_MRP_PORT_STATE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_PORT_ROLE_UNSPEC, + IFLA_BRIDGE_MRP_PORT_ROLE_ROLE, + __IFLA_BRIDGE_MRP_PORT_ROLE_MAX, +}; + +#define IFLA_BRIDGE_MRP_PORT_ROLE_MAX (__IFLA_BRIDGE_MRP_PORT_ROLE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_RING_STATE_UNSPEC, + IFLA_BRIDGE_MRP_RING_STATE_RING_ID, + IFLA_BRIDGE_MRP_RING_STATE_STATE, + __IFLA_BRIDGE_MRP_RING_STATE_MAX, +}; + +#define IFLA_BRIDGE_MRP_RING_STATE_MAX (__IFLA_BRIDGE_MRP_RING_STATE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_RING_ROLE_UNSPEC, + IFLA_BRIDGE_MRP_RING_ROLE_RING_ID, + IFLA_BRIDGE_MRP_RING_ROLE_ROLE, + __IFLA_BRIDGE_MRP_RING_ROLE_MAX, +}; + +#define IFLA_BRIDGE_MRP_RING_ROLE_MAX (__IFLA_BRIDGE_MRP_RING_ROLE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_START_TEST_UNSPEC, + IFLA_BRIDGE_MRP_START_TEST_RING_ID, + IFLA_BRIDGE_MRP_START_TEST_INTERVAL, + IFLA_BRIDGE_MRP_START_TEST_MAX_MISS, + IFLA_BRIDGE_MRP_START_TEST_PERIOD, + __IFLA_BRIDGE_MRP_START_TEST_MAX, +}; + +#define IFLA_BRIDGE_MRP_START_TEST_MAX (__IFLA_BRIDGE_MRP_START_TEST_MAX - 1) + struct br_mrp_instance { __u32 ring_id; __u32 p_ifindex; __u32 s_ifindex; }; -struct br_mrp_port_role { - __u32 ring_id; - __u32 role; -}; - struct br_mrp_ring_state { __u32 ring_id; __u32 ring_state; @@ -197,8 +249,6 @@ struct br_mrp_start_test { __u32 period; }; -#define IFLA_BRIDGE_MRP_MAX (__IFLA_BRIDGE_MRP_MAX - 1) - struct bridge_stp_xstats { __u64 transition_blk; __u64 transition_fwd; -- cgit v1.2.3 From cb8aa9a3affb7d23b11b11fbed41e2feaabc4b0a Mon Sep 17 00:00:00 2001 From: Romain Bellan Date: Mon, 4 May 2020 21:34:29 +0200 Subject: netfilter: ctnetlink: add kernel side filtering for dump Conntrack dump does not support kernel side filtering (only get exists, but it returns only one entry. And user has to give a full valid tuple) It means that userspace has to implement filtering after receiving many irrelevant entries, consuming resources (conntrack table is sometimes very huge, much more than a routing table for example). This patch adds filtering in kernel side. To achieve this goal, we: * Add a new CTA_FILTER netlink attributes, actually a flag list to parametize filtering * Convert some *nlattr_to_tuple() functions, to allow a partial parsing of CTA_TUPLE_ORIG and CTA_TUPLE_REPLY (so nf_conntrack_tuple it not fully set) Filtering is now possible on: * IP SRC/DST values * Ports for TCP and UDP flows * IMCP(v6) codes types and IDs Filtering is done as an "AND" operator. For example, when flags PROTO_SRC_PORT, PROTO_NUM and IP_SRC are sets, only entries matching all values are dumped. Changes since v1: Set NLM_F_DUMP_FILTERED in nlm flags if entries are filtered Changes since v2: Move several constants to nf_internals.h Move a fix on netlink values check in a separate patch Add a check on not-supported flags Return EOPNOTSUPP if CDA_FILTER is set in ctnetlink_flush_conntrack (not yet implemented) Code style issues Changes since v3: Fix compilation warning reported by kbuild test robot Changes since v4: Fix a regression introduced in v3 (returned EINVAL for valid netlink messages without CTA_MARK) Changes since v5: Change definition of CTA_FILTER_F_ALL Fix a regression when CTA_TUPLE_ZONE is not set Signed-off-by: Romain Bellan Signed-off-by: Florent Fourcot Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_conntrack.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 1d41810d17e2..262881792671 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -55,6 +55,7 @@ enum ctattr_type { CTA_LABELS, CTA_LABELS_MASK, CTA_SYNPROXY, + CTA_FILTER, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) @@ -276,4 +277,12 @@ enum ctattr_expect_stats { }; #define CTA_STATS_EXP_MAX (__CTA_STATS_EXP_MAX - 1) +enum ctattr_filter { + CTA_FILTER_UNSPEC, + CTA_FILTER_ORIG_FLAGS, + CTA_FILTER_REPLY_FLAGS, + __CTA_FILTER_MAX +}; +#define CTA_FILTER_MAX (__CTA_FILTER_MAX - 1) + #endif /* _IPCONNTRACK_NETLINK_H */ -- cgit v1.2.3 From 83fc5dd57f86c3ec7d6d22565a6ff6c948853b64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= Date: Mon, 11 May 2020 18:19:02 +0200 Subject: mmc: fix compilation of user API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The definitions of MMC_IOC_CMD and of MMC_IOC_MULTI_CMD rely on MMC_BLOCK_MAJOR: #define MMC_IOC_CMD _IOWR(MMC_BLOCK_MAJOR, 0, struct mmc_ioc_cmd) #define MMC_IOC_MULTI_CMD _IOWR(MMC_BLOCK_MAJOR, 1, struct mmc_ioc_multi_cmd) However, MMC_BLOCK_MAJOR is defined in linux/major.h and linux/mmc/ioctl.h did not include it. Signed-off-by: Jérôme Pouiller Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200511161902.191405-1-Jerome.Pouiller@silabs.com Signed-off-by: Ulf Hansson --- include/uapi/linux/mmc/ioctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mmc/ioctl.h b/include/uapi/linux/mmc/ioctl.h index 00c08120f3ba..27a39847d55c 100644 --- a/include/uapi/linux/mmc/ioctl.h +++ b/include/uapi/linux/mmc/ioctl.h @@ -3,6 +3,7 @@ #define LINUX_MMC_IOCTL_H #include +#include struct mmc_ioc_cmd { /* -- cgit v1.2.3 From a8a24f3f6e38103b77cf399c38eb54e1219d00d6 Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Fri, 29 May 2020 02:00:47 +0530 Subject: vfio: UAPI for migration interface for device state - Defined MIGRATION region type and sub-type. - Defined vfio_device_migration_info structure which will be placed at the 0th offset of migration region to get/set VFIO device related information. Defined members of structure and usage on read/write access. - Defined device states and state transition details. - Defined sequence to be followed while saving and resuming VFIO device. Signed-off-by: Kirti Wankhede Reviewed-by: Neo Jia Reviewed-by: Cornelia Huck Reviewed-by: Yan Zhao Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 228 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 228 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 015516bcfaa3..ad9bb5af3463 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -305,6 +305,7 @@ struct vfio_region_info_cap_type { #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) #define VFIO_REGION_TYPE_GFX (1) #define VFIO_REGION_TYPE_CCW (2) +#define VFIO_REGION_TYPE_MIGRATION (3) /* sub-types for VFIO_REGION_TYPE_PCI_* */ @@ -379,6 +380,233 @@ struct vfio_region_gfx_edid { /* sub-types for VFIO_REGION_TYPE_CCW */ #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) +/* sub-types for VFIO_REGION_TYPE_MIGRATION */ +#define VFIO_REGION_SUBTYPE_MIGRATION (1) + +/* + * The structure vfio_device_migration_info is placed at the 0th offset of + * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related + * migration information. Field accesses from this structure are only supported + * at their native width and alignment. Otherwise, the result is undefined and + * vendor drivers should return an error. + * + * device_state: (read/write) + * - The user application writes to this field to inform the vendor driver + * about the device state to be transitioned to. + * - The vendor driver should take the necessary actions to change the + * device state. After successful transition to a given state, the + * vendor driver should return success on write(device_state, state) + * system call. If the device state transition fails, the vendor driver + * should return an appropriate -errno for the fault condition. + * - On the user application side, if the device state transition fails, + * that is, if write(device_state, state) returns an error, read + * device_state again to determine the current state of the device from + * the vendor driver. + * - The vendor driver should return previous state of the device unless + * the vendor driver has encountered an internal error, in which case + * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR. + * - The user application must use the device reset ioctl to recover the + * device from VFIO_DEVICE_STATE_ERROR state. If the device is + * indicated to be in a valid device state by reading device_state, the + * user application may attempt to transition the device to any valid + * state reachable from the current state or terminate itself. + * + * device_state consists of 3 bits: + * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear, + * it indicates the _STOP state. When the device state is changed to + * _STOP, driver should stop the device before write() returns. + * - If bit 1 is set, it indicates the _SAVING state, which means that the + * driver should start gathering device state information that will be + * provided to the VFIO user application to save the device's state. + * - If bit 2 is set, it indicates the _RESUMING state, which means that + * the driver should prepare to resume the device. Data provided through + * the migration region should be used to resume the device. + * Bits 3 - 31 are reserved for future use. To preserve them, the user + * application should perform a read-modify-write operation on this + * field when modifying the specified bits. + * + * +------- _RESUMING + * |+------ _SAVING + * ||+----- _RUNNING + * ||| + * 000b => Device Stopped, not saving or resuming + * 001b => Device running, which is the default state + * 010b => Stop the device & save the device state, stop-and-copy state + * 011b => Device running and save the device state, pre-copy state + * 100b => Device stopped and the device state is resuming + * 101b => Invalid state + * 110b => Error state + * 111b => Invalid state + * + * State transitions: + * + * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP + * (100b) (001b) (011b) (010b) (000b) + * 0. Running or default state + * | + * + * 1. Normal Shutdown (optional) + * |------------------------------------->| + * + * 2. Save the state or suspend + * |------------------------->|---------->| + * + * 3. Save the state during live migration + * |----------->|------------>|---------->| + * + * 4. Resuming + * |<---------| + * + * 5. Resumed + * |--------->| + * + * 0. Default state of VFIO device is _RUNNNG when the user application starts. + * 1. During normal shutdown of the user application, the user application may + * optionally change the VFIO device state from _RUNNING to _STOP. This + * transition is optional. The vendor driver must support this transition but + * must not require it. + * 2. When the user application saves state or suspends the application, the + * device state transitions from _RUNNING to stop-and-copy and then to _STOP. + * On state transition from _RUNNING to stop-and-copy, driver must stop the + * device, save the device state and send it to the application through the + * migration region. The sequence to be followed for such transition is given + * below. + * 3. In live migration of user application, the state transitions from _RUNNING + * to pre-copy, to stop-and-copy, and to _STOP. + * On state transition from _RUNNING to pre-copy, the driver should start + * gathering the device state while the application is still running and send + * the device state data to application through the migration region. + * On state transition from pre-copy to stop-and-copy, the driver must stop + * the device, save the device state and send it to the user application + * through the migration region. + * Vendor drivers must support the pre-copy state even for implementations + * where no data is provided to the user before the stop-and-copy state. The + * user must not be required to consume all migration data before the device + * transitions to a new state, including the stop-and-copy state. + * The sequence to be followed for above two transitions is given below. + * 4. To start the resuming phase, the device state should be transitioned from + * the _RUNNING to the _RESUMING state. + * In the _RESUMING state, the driver should use the device state data + * received through the migration region to resume the device. + * 5. After providing saved device data to the driver, the application should + * change the state from _RESUMING to _RUNNING. + * + * reserved: + * Reads on this field return zero and writes are ignored. + * + * pending_bytes: (read only) + * The number of pending bytes still to be migrated from the vendor driver. + * + * data_offset: (read only) + * The user application should read data_offset field from the migration + * region. The user application should read the device data from this + * offset within the migration region during the _SAVING state or write + * the device data during the _RESUMING state. See below for details of + * sequence to be followed. + * + * data_size: (read/write) + * The user application should read data_size to get the size in bytes of + * the data copied in the migration region during the _SAVING state and + * write the size in bytes of the data copied in the migration region + * during the _RESUMING state. + * + * The format of the migration region is as follows: + * ------------------------------------------------------------------ + * |vfio_device_migration_info| data section | + * | | /////////////////////////////// | + * ------------------------------------------------------------------ + * ^ ^ + * offset 0-trapped part data_offset + * + * The structure vfio_device_migration_info is always followed by the data + * section in the region, so data_offset will always be nonzero. The offset + * from where the data is copied is decided by the kernel driver. The data + * section can be trapped, mmapped, or partitioned, depending on how the kernel + * driver defines the data section. The data section partition can be defined + * as mapped by the sparse mmap capability. If mmapped, data_offset must be + * page aligned, whereas initial section which contains the + * vfio_device_migration_info structure, might not end at the offset, which is + * page aligned. The user is not required to access through mmap regardless + * of the capabilities of the region mmap. + * The vendor driver should determine whether and how to partition the data + * section. The vendor driver should return data_offset accordingly. + * + * The sequence to be followed while in pre-copy state and stop-and-copy state + * is as follows: + * a. Read pending_bytes, indicating the start of a new iteration to get device + * data. Repeated read on pending_bytes at this stage should have no side + * effects. + * If pending_bytes == 0, the user application should not iterate to get data + * for that device. + * If pending_bytes > 0, perform the following steps. + * b. Read data_offset, indicating that the vendor driver should make data + * available through the data section. The vendor driver should return this + * read operation only after data is available from (region + data_offset) + * to (region + data_offset + data_size). + * c. Read data_size, which is the amount of data in bytes available through + * the migration region. + * Read on data_offset and data_size should return the offset and size of + * the current buffer if the user application reads data_offset and + * data_size more than once here. + * d. Read data_size bytes of data from (region + data_offset) from the + * migration region. + * e. Process the data. + * f. Read pending_bytes, which indicates that the data from the previous + * iteration has been read. If pending_bytes > 0, go to step b. + * + * The user application can transition from the _SAVING|_RUNNING + * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the + * number of pending bytes. The user application should iterate in _SAVING + * (stop-and-copy) until pending_bytes is 0. + * + * The sequence to be followed while _RESUMING device state is as follows: + * While data for this device is available, repeat the following steps: + * a. Read data_offset from where the user application should write data. + * b. Write migration data starting at the migration region + data_offset for + * the length determined by data_size from the migration source. + * c. Write data_size, which indicates to the vendor driver that data is + * written in the migration region. Vendor driver must return this write + * operations on consuming data. Vendor driver should apply the + * user-provided migration region data to the device resume state. + * + * If an error occurs during the above sequences, the vendor driver can return + * an error code for next read() or write() operation, which will terminate the + * loop. The user application should then take the next necessary action, for + * example, failing migration or terminating the user application. + * + * For the user application, data is opaque. The user application should write + * data in the same order as the data is received and the data should be of + * same transaction size at the source. + */ + +struct vfio_device_migration_info { + __u32 device_state; /* VFIO device state */ +#define VFIO_DEVICE_STATE_STOP (0) +#define VFIO_DEVICE_STATE_RUNNING (1 << 0) +#define VFIO_DEVICE_STATE_SAVING (1 << 1) +#define VFIO_DEVICE_STATE_RESUMING (1 << 2) +#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ + VFIO_DEVICE_STATE_SAVING | \ + VFIO_DEVICE_STATE_RESUMING) + +#define VFIO_DEVICE_STATE_VALID(state) \ + (state & VFIO_DEVICE_STATE_RESUMING ? \ + (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1) + +#define VFIO_DEVICE_STATE_IS_ERROR(state) \ + ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \ + VFIO_DEVICE_STATE_RESUMING)) + +#define VFIO_DEVICE_STATE_SET_ERROR(state) \ + ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \ + VFIO_DEVICE_STATE_RESUMING) + + __u32 reserved; + __u64 pending_bytes; + __u64 data_offset; + __u64 data_size; +}; + /* * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped * which allows direct access to non-MSIX registers which happened to be within -- cgit v1.2.3 From b704fd14a06f678f080e44db28061f1bcb1f595c Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Fri, 29 May 2020 02:00:50 +0530 Subject: vfio iommu: Add ioctl definition for dirty pages tracking IOMMU container maintains a list of all pages pinned by vfio_pin_pages API. All pages pinned by vendor driver through this API should be considered as dirty during migration. When container consists of IOMMU capable device and all pages are pinned and mapped, then all pages are marked dirty. Added support to start/stop dirtied pages tracking and to get bitmap of all dirtied pages for requested IO virtual address range. Signed-off-by: Kirti Wankhede Reviewed-by: Neo Jia Reviewed-by: Cornelia Huck Reviewed-by: Yan Zhao Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ad9bb5af3463..009a8c80079d 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1033,6 +1033,12 @@ struct vfio_iommu_type1_dma_map { #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) +struct vfio_bitmap { + __u64 pgsize; /* page size for bitmap in bytes */ + __u64 size; /* in bytes */ + __u64 __user *data; /* one bit per page */ +}; + /** * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14, * struct vfio_dma_unmap) @@ -1059,6 +1065,57 @@ struct vfio_iommu_type1_dma_unmap { #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) +/** + * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17, + * struct vfio_iommu_type1_dirty_bitmap) + * IOCTL is used for dirty pages logging. + * Caller should set flag depending on which operation to perform, details as + * below: + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs + * the IOMMU driver to log pages that are dirtied or potentially dirtied by + * the device; designed to be used when a migration is in progress. Dirty pages + * are logged until logging is disabled by user application by calling the IOCTL + * with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag. + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs + * the IOMMU driver to stop logging dirtied pages. + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set + * returns the dirty pages bitmap for IOMMU container for a given IOVA range. + * The user must specify the IOVA range and the pgsize through the structure + * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface + * supports getting a bitmap of the smallest supported pgsize only and can be + * modified in future to get a bitmap of any specified supported pgsize. The + * user must provide a zeroed memory area for the bitmap memory and specify its + * size in bitmap.size. One bit is used to represent one page consecutively + * starting from iova offset. The user should provide page size in bitmap.pgsize + * field. A bit set in the bitmap indicates that the page at that offset from + * iova is dirty. The caller must set argsz to a value including the size of + * structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the + * actual bitmap. If dirty pages logging is not enabled, an error will be + * returned. + * + * Only one of the flags _START, _STOP and _GET may be specified at a time. + * + */ +struct vfio_iommu_type1_dirty_bitmap { + __u32 argsz; + __u32 flags; +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) + __u8 data[]; +}; + +struct vfio_iommu_type1_dirty_bitmap_get { + __u64 iova; /* IO virtual address */ + __u64 size; /* Size of iova range */ + struct vfio_bitmap bitmap; +}; + +#define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) + /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ /* -- cgit v1.2.3 From 331e33d2960c8292bad8b02578fcfac18f721517 Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Fri, 29 May 2020 02:00:52 +0530 Subject: vfio iommu: Update UNMAP_DMA ioctl to get dirty bitmap before unmap DMA mapped pages, including those pinned by mdev vendor drivers, might get unpinned and unmapped while migration is active and device is still running. For example, in pre-copy phase while guest driver could access those pages, host device or vendor driver can dirty these mapped pages. Such pages should be marked dirty so as to maintain memory consistency for a user making use of dirty page tracking. To get bitmap during unmap, user should allocate memory for bitmap, set it all zeros, set size of allocated memory, set page size to be considered for bitmap and set flag VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP. Signed-off-by: Kirti Wankhede Reviewed-by: Neo Jia Reviewed-by: Cornelia Huck Reviewed-by: Yan Zhao Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 009a8c80079d..ff4b6706f7df 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1048,12 +1048,23 @@ struct vfio_bitmap { * field. No guarantee is made to the user that arbitrary unmaps of iova * or size different from those used in the original mapping call will * succeed. + * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap + * before unmapping IO virtual addresses. When this flag is set, the user must + * provide a struct vfio_bitmap in data[]. User must provide zero-allocated + * memory via vfio_bitmap.data and its size in the vfio_bitmap.size field. + * A bit in the bitmap represents one page, of user provided page size in + * vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set + * indicates that the page at that offset from iova is dirty. A Bitmap of the + * pages in the range of unmapped size is returned in the user-provided + * vfio_bitmap.data. */ struct vfio_iommu_type1_dma_unmap { __u32 argsz; __u32 flags; +#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) __u64 iova; /* IO virtual address */ __u64 size; /* Size of mapping (bytes) */ + __u8 data[]; }; #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) -- cgit v1.2.3 From ad721705d09c62f0d108a6b4f59867ebfd592c90 Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Fri, 29 May 2020 02:00:53 +0530 Subject: vfio iommu: Add migration capability to report supported features Added migration capability in IOMMU info chain. User application should check IOMMU info chain for migration capability to use dirty page tracking feature provided by kernel module. User application must check page sizes supported and maximum dirty bitmap size returned by this capability structure for ioctls used to get dirty bitmap. Signed-off-by: Kirti Wankhede Reviewed-by: Cornelia Huck Reviewed-by: Yan Zhao Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ff4b6706f7df..fde4692a6989 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1013,6 +1013,29 @@ struct vfio_iommu_type1_info_cap_iova_range { struct vfio_iova_range iova_ranges[]; }; +/* + * The migration capability allows to report supported features for migration. + * + * The structures below define version 1 of this capability. + * + * The existence of this capability indicates that IOMMU kernel driver supports + * dirty page logging. + * + * pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty + * page logging. + * max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap + * size in bytes that can be used by user applications when getting the dirty + * bitmap. + */ +#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 1 + +struct vfio_iommu_type1_info_cap_migration { + struct vfio_info_cap_header header; + __u32 flags; + __u64 pgsize_bitmap; + __u64 max_dirty_bitmap_size; /* in bytes */ +}; + #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) /** -- cgit v1.2.3 From b383a73f2b832491a2f9e6e8ada26aad53b5763d Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 28 May 2020 08:00:02 -0700 Subject: fs/ext4: Introduce DAX inode flag Add a flag ([EXT4|FS]_DAX_FL) to preserve FS_XFLAG_DAX in the ext4 inode. Set the flag to be user visible and changeable. Set the flag to be inherited. Allow applications to change the flag at any time except if it conflicts with the set of mutually exclusive flags (Currently VERITY, ENCRYPT, JOURNAL_DATA). Furthermore, restrict setting any of the exclusive flags if DAX is set. While conceptually possible, we do not allow setting EXT4_DAX_FL while at the same time clearing exclusion flags (or vice versa) for 2 reasons: 1) The DAX flag does not take effect immediately which introduces quite a bit of complexity 2) There is no clear use case for being this flexible Finally, on regular files, flag the inode to not be cached to facilitate changing S_DAX on the next creation of the inode. Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20200528150003.828793-9-ira.weiny@intel.com Signed-off-by: Theodore Ts'o --- include/uapi/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 379a612f8f1d..f44eb0a04afd 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -262,6 +262,7 @@ struct fsxattr { #define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */ #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#define FS_DAX_FL 0x02000000 /* Inode is DAX */ #define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */ #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */ -- cgit v1.2.3 From 43e64bf301fd8c54f0082d91c6ffd4de861baf96 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Thu, 28 May 2020 21:34:29 +0200 Subject: cfg80211: handle 6 GHz capability of new station Handle 6 GHz HE capability while adding new station. It will be used later in mac80211 station processing. Signed-off-by: Rajkumar Manoharan Link: https://lore.kernel.org/r/1589399105-25472-2-git-send-email-rmanohar@codeaurora.org [handle nl80211_set_station, require WME, remove NL80211_HE_6GHZ_CAPABILITY_LEN] Link: https://lore.kernel.org/r/20200528213443.b6b711fd4312.Ic9b97d57b6c4f2b28d4b2d23d2849d8bc20bd8cc@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c14666b75e57..e42ae429383e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2502,6 +2502,9 @@ enum nl80211_commands { * first channel segment specified in %NL80211_ATTR_CENTER_FREQ1. * @NL80211_ATTR_SCAN_FREQ_KHZ: nested attribute with KHz frequencies * + * @NL80211_ATTR_HE_6GHZ_CAPABILITY: HE 6 GHz Band Capability element (from + * association request when used with NL80211_CMD_NEW_STATION). + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2982,6 +2985,8 @@ enum nl80211_attrs { NL80211_ATTR_CENTER_FREQ1_OFFSET, NL80211_ATTR_SCAN_FREQ_KHZ, + NL80211_ATTR_HE_6GHZ_CAPABILITY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 223952177296c34d9c8de9cde33204caffe55725 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 May 2020 21:34:31 +0200 Subject: cfg80211: add and expose HE 6 GHz band capabilities These capabilities cover what would otherwise be transported in HT/VHT capabilities, but only a subset thereof that is actually needed on 6 GHz with HE already present. Expose the capabilities to userspace, drivers are expected to set them as using the 6 GHz band (currently) requires HE capability. Link: https://lore.kernel.org/r/20200528213443.244cd5cb9db8.Icd8c773277a88c837e7e3af1d4d1013cc3b66543@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e42ae429383e..5b350d032fa3 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3565,6 +3565,8 @@ enum nl80211_mpath_info { * defined in HE capabilities IE * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band HE capability attribute currently * defined + * @NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA: HE 6GHz band capabilities (__le16), + * given for all 6 GHz band channels * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use */ enum nl80211_band_iftype_attr { @@ -3575,6 +3577,7 @@ enum nl80211_band_iftype_attr { NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE, + NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA, /* keep last */ __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST, -- cgit v1.2.3 From 093a48d2aa4b74db3134b61d7b7a061dbe79177b Mon Sep 17 00:00:00 2001 From: Nathan Errera Date: Thu, 28 May 2020 21:22:38 +0200 Subject: cfg80211: support bigger kek/kck key length With some newer AKMs, the KCK and KEK are bigger, so allow that if the driver advertises support for it. In addition, add a new attribute for the AKM so we can use it for offloaded rekeying. Signed-off-by: Nathan Errera [reword commit message] Link: https://lore.kernel.org/r/20200528212237.5eb58b00a5d1.I61b09d77c4f382e8d58a05dcca78096e99a6bc15@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5b350d032fa3..dad8c8f8581f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5396,6 +5396,8 @@ enum plink_actions { #define NL80211_KCK_LEN 16 #define NL80211_KEK_LEN 16 +#define NL80211_KCK_EXT_LEN 24 +#define NL80211_KEK_EXT_LEN 32 #define NL80211_REPLAY_CTR_LEN 8 /** @@ -5404,6 +5406,7 @@ enum plink_actions { * @NL80211_REKEY_DATA_KEK: key encryption key (binary) * @NL80211_REKEY_DATA_KCK: key confirmation key (binary) * @NL80211_REKEY_DATA_REPLAY_CTR: replay counter (binary) + * @NL80211_REKEY_DATA_AKM: AKM data (OUI, suite type) * @NUM_NL80211_REKEY_DATA: number of rekey attributes (internal) * @MAX_NL80211_REKEY_DATA: highest rekey attribute (internal) */ @@ -5412,6 +5415,7 @@ enum nl80211_rekey_data { NL80211_REKEY_DATA_KEK, NL80211_REKEY_DATA_KCK, NL80211_REKEY_DATA_REPLAY_CTR, + NL80211_REKEY_DATA_AKM, /* keep last */ NUM_NL80211_REKEY_DATA, -- cgit v1.2.3 From 72de5fa4c16195827252b961ba44028a39dfeaff Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 25 May 2020 16:41:22 +0200 Subject: KVM: x86: announce KVM_FEATURE_ASYNC_PF_INT Introduce new capability to indicate that KVM supports interrupt based delivery of 'page ready' APF events. This includes support for both MSR_KVM_ASYNC_PF_INT and MSR_KVM_ASYNC_PF_ACK. Signed-off-by: Vitaly Kuznetsov Message-Id: <20200525144125.143875-8-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ac9eba0289d1..fc0075988b80 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1018,6 +1018,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_PROTECTED 180 #define KVM_CAP_PPC_SECURE_GUEST 181 #define KVM_CAP_HALT_POLL 182 +#define KVM_CAP_ASYNC_PF_INT 183 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From f7d31e65368aeef973fab788aa22c4f1d5a6af66 Mon Sep 17 00:00:00 2001 From: Jon Doron Date: Fri, 24 Apr 2020 14:37:40 +0300 Subject: x86/kvm/hyper-v: Explicitly align hcall param for kvm_hyperv_exit The problem the patch is trying to address is the fact that 'struct kvm_hyperv_exit' has different layout on when compiling in 32 and 64 bit modes. In 64-bit mode the default alignment boundary is 64 bits thus forcing extra gaps after 'type' and 'msr' but in 32-bit mode the boundary is at 32 bits thus no extra gaps. This is an issue as even when the kernel is 64 bit, the userspace using the interface can be both 32 and 64 bit but the same 32 bit userspace has to work with 32 bit kernel. The issue is fixed by forcing the 64 bit layout, this leads to ABI change for 32 bit builds and while we are obviously breaking '32 bit userspace with 32 bit kernel' case, we're fixing the '32 bit userspace with 64 bit kernel' one. As the interface has no (known) users and 32 bit KVM is rather baroque nowadays, this seems like a reasonable decision. Reviewed-by: Vitaly Kuznetsov Signed-off-by: Jon Doron Message-Id: <20200424113746.3473563-2-arilou@gmail.com> Reviewed-by: Roman Kagan Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index fc0075988b80..cc04aceb8793 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -189,9 +189,11 @@ struct kvm_hyperv_exit { #define KVM_EXIT_HYPERV_SYNIC 1 #define KVM_EXIT_HYPERV_HCALL 2 __u32 type; + __u32 pad1; union { struct { __u32 msr; + __u32 pad2; __u64 control; __u64 evt_page; __u64 msg_page; -- cgit v1.2.3 From f97f5a56f5977311f3833056a73cdbb0ee56cb1e Mon Sep 17 00:00:00 2001 From: Jon Doron Date: Fri, 29 May 2020 16:45:40 +0300 Subject: x86/kvm/hyper-v: Add support for synthetic debugger interface Add support for Hyper-V synthetic debugger (syndbg) interface. The syndbg interface is using MSRs to emulate a way to send/recv packets data. The debug transport dll (kdvm/kdnet) will identify if Hyper-V is enabled and if it supports the synthetic debugger interface it will attempt to use it, instead of trying to initialize a network adapter. Reviewed-by: Vitaly Kuznetsov Signed-off-by: Jon Doron Message-Id: <20200529134543.1127440-4-arilou@gmail.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index cc04aceb8793..6721eb563eda 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -188,6 +188,7 @@ struct kvm_s390_cmma_log { struct kvm_hyperv_exit { #define KVM_EXIT_HYPERV_SYNIC 1 #define KVM_EXIT_HYPERV_HCALL 2 +#define KVM_EXIT_HYPERV_SYNDBG 3 __u32 type; __u32 pad1; union { @@ -203,6 +204,15 @@ struct kvm_hyperv_exit { __u64 result; __u64 params[2]; } hcall; + struct { + __u32 msr; + __u32 pad2; + __u64 control; + __u64 status; + __u64 send_page; + __u64 recv_page; + __u64 pending_page; + } syndbg; } u; }; -- cgit v1.2.3 From 9eefeabed6f831018c15bd7e17d34967ee34d9dd Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 29 May 2020 21:36:39 +0300 Subject: devlink: Add 'mirror' trap action The action is used by control traps such as IGMP query. The packet is flooded by the device, but also trapped to the CPU in order for the software bridge to mark the receiving port as a multicast router port. Such packets are marked with 'skb->offload_fwd_mark = 1' in order to prevent the software bridge from flooding them again. Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 1ae90e06c06d..16305932a950 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -233,10 +233,13 @@ enum { * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not * sent to the CPU. * @DEVLINK_TRAP_ACTION_TRAP: The sole copy of the packet is sent to the CPU. + * @DEVLINK_TRAP_ACTION_MIRROR: Packet is forwarded by the device and a copy is + * sent to the CPU. */ enum devlink_trap_action { DEVLINK_TRAP_ACTION_DROP, DEVLINK_TRAP_ACTION_TRAP, + DEVLINK_TRAP_ACTION_MIRROR, }; /** -- cgit v1.2.3 From 30a4e9a29ab9aadfe6c5386ae4aa396b1d2556c2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 29 May 2020 21:36:40 +0300 Subject: devlink: Add 'control' trap type This type is used for traps that trap control packets such as ARP request and IGMP query to the CPU. Do not report such packets to the kernel's drop monitor as they were not dropped by the device no encountered an exception during forwarding. Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 16305932a950..08563e6a424d 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -253,10 +253,16 @@ enum devlink_trap_action { * control plane for resolution. Trapped packets * are processed by devlink and injected to * the kernel's Rx path. + * @DEVLINK_TRAP_TYPE_CONTROL: Packet was trapped because it is required for + * the correct functioning of the control plane. + * For example, an ARP request packet. Trapped + * packets are injected to the kernel's Rx path, + * but not reported to drop monitor. */ enum devlink_trap_type { DEVLINK_TRAP_TYPE_DROP, DEVLINK_TRAP_TYPE_EXCEPTION, + DEVLINK_TRAP_TYPE_CONTROL, }; enum { -- cgit v1.2.3 From 7e89ed8ab3f74e0746d3ea80537d7a06b0e27732 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sat, 30 May 2020 18:09:46 +0000 Subject: bridge: mrp: Update MRP frame type Replace u16/u32 with be16/be32 in the MRP frame types. This fixes sparse warnings like: warning: cast to restricted __be16 Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- include/uapi/linux/mrp_bridge.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h index 2600cdf5a284..bcad42128d62 100644 --- a/include/uapi/linux/mrp_bridge.h +++ b/include/uapi/linux/mrp_bridge.h @@ -55,30 +55,30 @@ struct br_mrp_end_hdr { }; struct br_mrp_common_hdr { - __u16 seq_id; + __be16 seq_id; __u8 domain[MRP_DOMAIN_UUID_LENGTH]; }; struct br_mrp_ring_test_hdr { - __u16 prio; + __be16 prio; __u8 sa[ETH_ALEN]; - __u16 port_role; - __u16 state; - __u16 transitions; - __u32 timestamp; + __be16 port_role; + __be16 state; + __be16 transitions; + __be32 timestamp; }; struct br_mrp_ring_topo_hdr { - __u16 prio; + __be16 prio; __u8 sa[ETH_ALEN]; - __u16 interval; + __be16 interval; }; struct br_mrp_ring_link_hdr { __u8 sa[ETH_ALEN]; - __u16 port_role; - __u16 interval; - __u16 blocked; + __be16 port_role; + __be16 interval; + __be16 blocked; }; #endif -- cgit v1.2.3 From 4b3a61b030d1131dcf3633a276158a3d0a435a47 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sat, 30 May 2020 18:09:47 +0000 Subject: bridge: mrp: Set the priority of MRP instance Each MRP instance has a priority, a lower value means a higher priority. The priority of MRP instance is stored in MRP_Test frame in this way all the MRP nodes in the ring can see other nodes priority. Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 5a43eb86c93b..0162c1370ecb 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -176,6 +176,7 @@ enum { IFLA_BRIDGE_MRP_INSTANCE_RING_ID, IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX, IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX, + IFLA_BRIDGE_MRP_INSTANCE_PRIO, __IFLA_BRIDGE_MRP_INSTANCE_MAX, }; @@ -230,6 +231,7 @@ struct br_mrp_instance { __u32 ring_id; __u32 p_ifindex; __u32 s_ifindex; + __u16 prio; }; struct br_mrp_ring_state { -- cgit v1.2.3 From c6676e7d62cfb5cb7c1c5320a26f3634a11afdb0 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sat, 30 May 2020 18:09:48 +0000 Subject: bridge: mrp: Add support for role MRA A node that has the MRA role, it can behave as MRM or MRC. Initially it starts as MRM and sends MRP_Test frames on both ring ports. If it detects that there are MRP_Test send by another MRM, then it checks if these frames have a lower priority than itself. In this case it would send MRP_Nack frames to notify the other node that it needs to stop sending MRP_Test frames. If it receives a MRP_Nack frame then it stops sending MRP_Test frames and starts to behave as a MRC but it would continue to monitor the MRP_Test frames send by MRM. If at a point the MRM stops to send MRP_Test frames it would get the MRM role and start to send MRP_Test frames. Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 ++ include/uapi/linux/mrp_bridge.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 0162c1370ecb..caa6914a3e53 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -222,6 +222,7 @@ enum { IFLA_BRIDGE_MRP_START_TEST_INTERVAL, IFLA_BRIDGE_MRP_START_TEST_MAX_MISS, IFLA_BRIDGE_MRP_START_TEST_PERIOD, + IFLA_BRIDGE_MRP_START_TEST_MONITOR, __IFLA_BRIDGE_MRP_START_TEST_MAX, }; @@ -249,6 +250,7 @@ struct br_mrp_start_test { __u32 interval; __u32 max_miss; __u32 period; + __u32 monitor; }; struct bridge_stp_xstats { diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h index bcad42128d62..84f15f48a7cb 100644 --- a/include/uapi/linux/mrp_bridge.h +++ b/include/uapi/linux/mrp_bridge.h @@ -11,11 +11,14 @@ #define MRP_DOMAIN_UUID_LENGTH 16 #define MRP_VERSION 1 #define MRP_FRAME_PRIO 7 +#define MRP_OUI_LENGTH 3 +#define MRP_MANUFACTURE_DATA_LENGTH 2 enum br_mrp_ring_role_type { BR_MRP_RING_ROLE_DISABLED, BR_MRP_RING_ROLE_MRC, BR_MRP_RING_ROLE_MRM, + BR_MRP_RING_ROLE_MRA, }; enum br_mrp_ring_state_type { @@ -43,6 +46,13 @@ enum br_mrp_tlv_header_type { BR_MRP_TLV_HEADER_RING_TOPO = 0x3, BR_MRP_TLV_HEADER_RING_LINK_DOWN = 0x4, BR_MRP_TLV_HEADER_RING_LINK_UP = 0x5, + BR_MRP_TLV_HEADER_OPTION = 0x7f, +}; + +enum br_mrp_sub_tlv_header_type { + BR_MRP_SUB_TLV_HEADER_TEST_MGR_NACK = 0x1, + BR_MRP_SUB_TLV_HEADER_TEST_PROPAGATE = 0x2, + BR_MRP_SUB_TLV_HEADER_TEST_AUTO_MGR = 0x3, }; struct br_mrp_tlv_hdr { @@ -50,6 +60,11 @@ struct br_mrp_tlv_hdr { __u8 length; }; +struct br_mrp_sub_tlv_hdr { + __u8 type; + __u8 length; +}; + struct br_mrp_end_hdr { struct br_mrp_tlv_hdr hdr; }; @@ -81,4 +96,27 @@ struct br_mrp_ring_link_hdr { __be16 blocked; }; +struct br_mrp_sub_opt_hdr { + __u8 type; + __u8 manufacture_data[MRP_MANUFACTURE_DATA_LENGTH]; +}; + +struct br_mrp_test_mgr_nack_hdr { + __be16 prio; + __u8 sa[ETH_ALEN]; + __be16 other_prio; + __u8 other_sa[ETH_ALEN]; +}; + +struct br_mrp_test_prop_hdr { + __be16 prio; + __u8 sa[ETH_ALEN]; + __be16 other_prio; + __u8 other_sa[ETH_ALEN]; +}; + +struct br_mrp_oui_hdr { + __u8 oui[MRP_OUI_LENGTH]; +}; + #endif -- cgit v1.2.3 From 13d70f5a5ecff367db2fb18ed4ebe433eab8a74c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 24 May 2020 09:51:15 -0700 Subject: bpf, sk_msg: Add get socket storage helpers Add helpers to use local socket storage. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/159033907577.12355.14740125020572756560.stgit@john-Precision-5820-Tower Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 97e1fd19ff58..54b93f8b49b8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3645,6 +3645,8 @@ struct sk_msg_md { __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ __u32 size; /* Total size of sk_msg */ + + __bpf_md_ptr(struct bpf_sock *, sk); /* current socket */ }; struct sk_reuseport_md { -- cgit v1.2.3 From 457f44363a8894135c85b7a9afd2bd8196db24ab Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 29 May 2020 00:54:20 -0700 Subject: bpf: Implement BPF ring buffer and verifier support for it This commit adds a new MPSC ring buffer implementation into BPF ecosystem, which allows multiple CPUs to submit data to a single shared ring buffer. On the consumption side, only single consumer is assumed. Motivation ---------- There are two distinctive motivators for this work, which are not satisfied by existing perf buffer, which prompted creation of a new ring buffer implementation. - more efficient memory utilization by sharing ring buffer across CPUs; - preserving ordering of events that happen sequentially in time, even across multiple CPUs (e.g., fork/exec/exit events for a task). These two problems are independent, but perf buffer fails to satisfy both. Both are a result of a choice to have per-CPU perf ring buffer. Both can be also solved by having an MPSC implementation of ring buffer. The ordering problem could technically be solved for perf buffer with some in-kernel counting, but given the first one requires an MPSC buffer, the same solution would solve the second problem automatically. Semantics and APIs ------------------ Single ring buffer is presented to BPF programs as an instance of BPF map of type BPF_MAP_TYPE_RINGBUF. Two other alternatives considered, but ultimately rejected. One way would be to, similar to BPF_MAP_TYPE_PERF_EVENT_ARRAY, make BPF_MAP_TYPE_RINGBUF could represent an array of ring buffers, but not enforce "same CPU only" rule. This would be more familiar interface compatible with existing perf buffer use in BPF, but would fail if application needed more advanced logic to lookup ring buffer by arbitrary key. HASH_OF_MAPS addresses this with current approach. Additionally, given the performance of BPF ringbuf, many use cases would just opt into a simple single ring buffer shared among all CPUs, for which current approach would be an overkill. Another approach could introduce a new concept, alongside BPF map, to represent generic "container" object, which doesn't necessarily have key/value interface with lookup/update/delete operations. This approach would add a lot of extra infrastructure that has to be built for observability and verifier support. It would also add another concept that BPF developers would have to familiarize themselves with, new syntax in libbpf, etc. But then would really provide no additional benefits over the approach of using a map. BPF_MAP_TYPE_RINGBUF doesn't support lookup/update/delete operations, but so doesn't few other map types (e.g., queue and stack; array doesn't support delete, etc). The approach chosen has an advantage of re-using existing BPF map infrastructure (introspection APIs in kernel, libbpf support, etc), being familiar concept (no need to teach users a new type of object in BPF program), and utilizing existing tooling (bpftool). For common scenario of using a single ring buffer for all CPUs, it's as simple and straightforward, as would be with a dedicated "container" object. On the other hand, by being a map, it can be combined with ARRAY_OF_MAPS and HASH_OF_MAPS map-in-maps to implement a wide variety of topologies, from one ring buffer for each CPU (e.g., as a replacement for perf buffer use cases), to a complicated application hashing/sharding of ring buffers (e.g., having a small pool of ring buffers with hashed task's tgid being a look up key to preserve order, but reduce contention). Key and value sizes are enforced to be zero. max_entries is used to specify the size of ring buffer and has to be a power of 2 value. There are a bunch of similarities between perf buffer (BPF_MAP_TYPE_PERF_EVENT_ARRAY) and new BPF ring buffer semantics: - variable-length records; - if there is no more space left in ring buffer, reservation fails, no blocking; - memory-mappable data area for user-space applications for ease of consumption and high performance; - epoll notifications for new incoming data; - but still the ability to do busy polling for new data to achieve the lowest latency, if necessary. BPF ringbuf provides two sets of APIs to BPF programs: - bpf_ringbuf_output() allows to *copy* data from one place to a ring buffer, similarly to bpf_perf_event_output(); - bpf_ringbuf_reserve()/bpf_ringbuf_commit()/bpf_ringbuf_discard() APIs split the whole process into two steps. First, a fixed amount of space is reserved. If successful, a pointer to a data inside ring buffer data area is returned, which BPF programs can use similarly to a data inside array/hash maps. Once ready, this piece of memory is either committed or discarded. Discard is similar to commit, but makes consumer ignore the record. bpf_ringbuf_output() has disadvantage of incurring extra memory copy, because record has to be prepared in some other place first. But it allows to submit records of the length that's not known to verifier beforehand. It also closely matches bpf_perf_event_output(), so will simplify migration significantly. bpf_ringbuf_reserve() avoids the extra copy of memory by providing a memory pointer directly to ring buffer memory. In a lot of cases records are larger than BPF stack space allows, so many programs have use extra per-CPU array as a temporary heap for preparing sample. bpf_ringbuf_reserve() avoid this needs completely. But in exchange, it only allows a known constant size of memory to be reserved, such that verifier can verify that BPF program can't access memory outside its reserved record space. bpf_ringbuf_output(), while slightly slower due to extra memory copy, covers some use cases that are not suitable for bpf_ringbuf_reserve(). The difference between commit and discard is very small. Discard just marks a record as discarded, and such records are supposed to be ignored by consumer code. Discard is useful for some advanced use-cases, such as ensuring all-or-nothing multi-record submission, or emulating temporary malloc()/free() within single BPF program invocation. Each reserved record is tracked by verifier through existing reference-tracking logic, similar to socket ref-tracking. It is thus impossible to reserve a record, but forget to submit (or discard) it. bpf_ringbuf_query() helper allows to query various properties of ring buffer. Currently 4 are supported: - BPF_RB_AVAIL_DATA returns amount of unconsumed data in ring buffer; - BPF_RB_RING_SIZE returns the size of ring buffer; - BPF_RB_CONS_POS/BPF_RB_PROD_POS returns current logical possition of consumer/producer, respectively. Returned values are momentarily snapshots of ring buffer state and could be off by the time helper returns, so this should be used only for debugging/reporting reasons or for implementing various heuristics, that take into account highly-changeable nature of some of those characteristics. One such heuristic might involve more fine-grained control over poll/epoll notifications about new data availability in ring buffer. Together with BPF_RB_NO_WAKEUP/BPF_RB_FORCE_WAKEUP flags for output/commit/discard helpers, it allows BPF program a high degree of control and, e.g., more efficient batched notifications. Default self-balancing strategy, though, should be adequate for most applications and will work reliable and efficiently already. Design and implementation ------------------------- This reserve/commit schema allows a natural way for multiple producers, either on different CPUs or even on the same CPU/in the same BPF program, to reserve independent records and work with them without blocking other producers. This means that if BPF program was interruped by another BPF program sharing the same ring buffer, they will both get a record reserved (provided there is enough space left) and can work with it and submit it independently. This applies to NMI context as well, except that due to using a spinlock during reservation, in NMI context, bpf_ringbuf_reserve() might fail to get a lock, in which case reservation will fail even if ring buffer is not full. The ring buffer itself internally is implemented as a power-of-2 sized circular buffer, with two logical and ever-increasing counters (which might wrap around on 32-bit architectures, that's not a problem): - consumer counter shows up to which logical position consumer consumed the data; - producer counter denotes amount of data reserved by all producers. Each time a record is reserved, producer that "owns" the record will successfully advance producer counter. At that point, data is still not yet ready to be consumed, though. Each record has 8 byte header, which contains the length of reserved record, as well as two extra bits: busy bit to denote that record is still being worked on, and discard bit, which might be set at commit time if record is discarded. In the latter case, consumer is supposed to skip the record and move on to the next one. Record header also encodes record's relative offset from the beginning of ring buffer data area (in pages). This allows bpf_ringbuf_commit()/bpf_ringbuf_discard() to accept only the pointer to the record itself, without requiring also the pointer to ring buffer itself. Ring buffer memory location will be restored from record metadata header. This significantly simplifies verifier, as well as improving API usability. Producer counter increments are serialized under spinlock, so there is a strict ordering between reservations. Commits, on the other hand, are completely lockless and independent. All records become available to consumer in the order of reservations, but only after all previous records where already committed. It is thus possible for slow producers to temporarily hold off submitted records, that were reserved later. Reservation/commit/consumer protocol is verified by litmus tests in Documentation/litmus-test/bpf-rb. One interesting implementation bit, that significantly simplifies (and thus speeds up as well) implementation of both producers and consumers is how data area is mapped twice contiguously back-to-back in the virtual memory. This allows to not take any special measures for samples that have to wrap around at the end of the circular buffer data area, because the next page after the last data page would be first data page again, and thus the sample will still appear completely contiguous in virtual memory. See comment and a simple ASCII diagram showing this visually in bpf_ringbuf_area_alloc(). Another feature that distinguishes BPF ringbuf from perf ring buffer is a self-pacing notifications of new data being availability. bpf_ringbuf_commit() implementation will send a notification of new record being available after commit only if consumer has already caught up right up to the record being committed. If not, consumer still has to catch up and thus will see new data anyways without needing an extra poll notification. Benchmarks (see tools/testing/selftests/bpf/benchs/bench_ringbuf.c) show that this allows to achieve a very high throughput without having to resort to tricks like "notify only every Nth sample", which are necessary with perf buffer. For extreme cases, when BPF program wants more manual control of notifications, commit/discard/output helpers accept BPF_RB_NO_WAKEUP and BPF_RB_FORCE_WAKEUP flags, which give full control over notifications of data availability, but require extra caution and diligence in using this API. Comparison to alternatives -------------------------- Before considering implementing BPF ring buffer from scratch existing alternatives in kernel were evaluated, but didn't seem to meet the needs. They largely fell into few categores: - per-CPU buffers (perf, ftrace, etc), which don't satisfy two motivations outlined above (ordering and memory consumption); - linked list-based implementations; while some were multi-producer designs, consuming these from user-space would be very complicated and most probably not performant; memory-mapping contiguous piece of memory is simpler and more performant for user-space consumers; - io_uring is SPSC, but also requires fixed-sized elements. Naively turning SPSC queue into MPSC w/ lock would have subpar performance compared to locked reserve + lockless commit, as with BPF ring buffer. Fixed sized elements would be too limiting for BPF programs, given existing BPF programs heavily rely on variable-sized perf buffer already; - specialized implementations (like a new printk ring buffer, [0]) with lots of printk-specific limitations and implications, that didn't seem to fit well for intended use with BPF programs. [0] https://lwn.net/Articles/779550/ Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200529075424.3139988-2-andriin@fb.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 84 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 54b93f8b49b8..974ca6e948e3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -147,6 +147,7 @@ enum bpf_map_type { BPF_MAP_TYPE_SK_STORAGE, BPF_MAP_TYPE_DEVMAP_HASH, BPF_MAP_TYPE_STRUCT_OPS, + BPF_MAP_TYPE_RINGBUF, }; /* Note that tracing related programs such as @@ -3157,6 +3158,59 @@ union bpf_attr { * **bpf_sk_cgroup_id**\ (). * Return * The id is returned or 0 in case the id could not be retrieved. + * + * void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) + * Description + * Copy *size* bytes from *data* into a ring buffer *ringbuf*. + * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of + * new data availability is sent. + * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of + * new data availability is sent unconditionally. + * Return + * 0, on success; + * < 0, on error. + * + * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) + * Description + * Reserve *size* bytes of payload in a ring buffer *ringbuf*. + * Return + * Valid pointer with *size* bytes of memory available; NULL, + * otherwise. + * + * void bpf_ringbuf_submit(void *data, u64 flags) + * Description + * Submit reserved ring buffer sample, pointed to by *data*. + * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of + * new data availability is sent. + * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of + * new data availability is sent unconditionally. + * Return + * Nothing. Always succeeds. + * + * void bpf_ringbuf_discard(void *data, u64 flags) + * Description + * Discard reserved ring buffer sample, pointed to by *data*. + * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of + * new data availability is sent. + * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of + * new data availability is sent unconditionally. + * Return + * Nothing. Always succeeds. + * + * u64 bpf_ringbuf_query(void *ringbuf, u64 flags) + * Description + * Query various characteristics of provided ring buffer. What + * exactly is queries is determined by *flags*: + * - BPF_RB_AVAIL_DATA - amount of data not yet consumed; + * - BPF_RB_RING_SIZE - the size of ring buffer; + * - BPF_RB_CONS_POS - consumer position (can wrap around); + * - BPF_RB_PROD_POS - producer(s) position (can wrap around); + * Data returned is just a momentary snapshots of actual values + * and could be inaccurate, so this facility should be used to + * power heuristics and for reporting, not to make 100% correct + * calculation. + * Return + * Requested value, or 0, if flags are not recognized. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3288,7 +3342,12 @@ union bpf_attr { FN(seq_printf), \ FN(seq_write), \ FN(sk_cgroup_id), \ - FN(sk_ancestor_cgroup_id), + FN(sk_ancestor_cgroup_id), \ + FN(ringbuf_output), \ + FN(ringbuf_reserve), \ + FN(ringbuf_submit), \ + FN(ringbuf_discard), \ + FN(ringbuf_query), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3398,6 +3457,29 @@ enum { BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0), }; +/* BPF_FUNC_bpf_ringbuf_commit, BPF_FUNC_bpf_ringbuf_discard, and + * BPF_FUNC_bpf_ringbuf_output flags. + */ +enum { + BPF_RB_NO_WAKEUP = (1ULL << 0), + BPF_RB_FORCE_WAKEUP = (1ULL << 1), +}; + +/* BPF_FUNC_bpf_ringbuf_query flags */ +enum { + BPF_RB_AVAIL_DATA = 0, + BPF_RB_RING_SIZE = 1, + BPF_RB_CONS_POS = 2, + BPF_RB_PROD_POS = 3, +}; + +/* BPF ring buffer constants */ +enum { + BPF_RINGBUF_BUSY_BIT = (1U << 31), + BPF_RINGBUF_DISCARD_BIT = (1U << 30), + BPF_RINGBUF_HDR_SZ = 8, +}; + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, -- cgit v1.2.3 From c3c16f2ea6d20159903cf93afbb1155f3d8348d5 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Tue, 26 May 2020 17:34:36 -0700 Subject: bpf: Add rx_queue_mapping to bpf_sock Add "rx_queue_mapping" to bpf_sock. This gives read access for the existing field (sk_rx_queue_mapping) of struct sock from bpf_sock. Semantics for the bpf_sock rx_queue_mapping access are similar to sk_rx_queue_get(), i.e the value NO_QUEUE_MAPPING is not allowed and -1 is returned in that case. This is useful for transmit queue selection based on the received queue index which is cached in the socket in the receive path. v3: Addressed review comments to add usecase in patch description, and fixed default value for rx_queue_mapping. v2: fixed build error for CONFIG_XPS wrapping, reported by kbuild test robot Signed-off-by: Amritha Nambiar Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 974ca6e948e3..630432c5c292 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3612,6 +3612,7 @@ struct bpf_sock { __u32 dst_ip4; __u32 dst_ip6[4]; __u32 state; + __s32 rx_queue_mapping; }; struct bpf_tcp_sock { -- cgit v1.2.3 From fbee97feed9b3e4acdf9590e1f6b4a2eefecfffe Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 29 May 2020 16:07:13 -0600 Subject: bpf: Add support to attach bpf program to a devmap entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add BPF_XDP_DEVMAP attach type for use with programs associated with a DEVMAP entry. Allow DEVMAPs to associate a program with a device entry by adding a bpf_prog.fd to 'struct bpf_devmap_val'. Values read show the program id, so the fd and id are a union. bpf programs can get access to the struct via vmlinux.h. The program associated with the fd must have type XDP with expected attach type BPF_XDP_DEVMAP. When a program is associated with a device index, the program is run on an XDP_REDIRECT and before the buffer is added to the per-cpu queue. At this point rxq data is still valid; the next patch adds tx device information allowing the prorgam to see both ingress and egress device indices. XDP generic is skb based and XDP programs do not work with skb's. Block the use case by walking maps used by a program that is to be attached via xdpgeneric and fail if any of them are DEVMAP / DEVMAP_HASH with Block attach of BPF_XDP_DEVMAP programs to devices. Signed-off-by: David Ahern Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20200529220716.75383-3-dsahern@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 630432c5c292..f1e364d69007 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -225,6 +225,7 @@ enum bpf_attach_type { BPF_CGROUP_INET6_GETPEERNAME, BPF_CGROUP_INET4_GETSOCKNAME, BPF_CGROUP_INET6_GETSOCKNAME, + BPF_XDP_DEVMAP, __MAX_BPF_ATTACH_TYPE }; -- cgit v1.2.3 From 64b59025c15b244c0954cf52b24fbabfcf5ed8f6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 29 May 2020 16:07:14 -0600 Subject: xdp: Add xdp_txq_info to xdp_buff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add xdp_txq_info as the Tx counterpart to xdp_rxq_info. At the moment only the device is added. Other fields (queue_index) can be added as use cases arise. >From a UAPI perspective, add egress_ifindex to xdp context for bpf programs to see the Tx device. Update the verifier to only allow accesses to egress_ifindex by XDP programs with BPF_XDP_DEVMAP expected attach type. Signed-off-by: David Ahern Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20200529220716.75383-4-dsahern@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f1e364d69007..f862a58fb567 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3707,6 +3707,8 @@ struct xdp_md { /* Below access go through struct xdp_rxq_info */ __u32 ingress_ifindex; /* rxq->dev->ifindex */ __u32 rx_queue_index; /* rxq->queue_index */ + + __u32 egress_ifindex; /* txq->dev->ifindex */ }; enum sk_action { -- cgit v1.2.3 From 7f045a49fee04b5662cbdeaf0838f9322ae8c63a Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sun, 31 May 2020 10:28:38 +0200 Subject: bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f862a58fb567..b9ed9f14f2a2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -237,6 +237,7 @@ enum bpf_link_type { BPF_LINK_TYPE_TRACING = 2, BPF_LINK_TYPE_CGROUP = 3, BPF_LINK_TYPE_ITER = 4, + BPF_LINK_TYPE_NETNS = 5, MAX_BPF_LINK_TYPE, }; @@ -3839,6 +3840,10 @@ struct bpf_link_info { __u64 cgroup_id; __u32 attach_type; } cgroup; + struct { + __u32 netns_ino; + __u32 attach_type; + } netns; }; } __attribute__((aligned(8))); -- cgit v1.2.3 From e7c8cc35a64d1d21e6fb811c519eadbda30f4f77 Mon Sep 17 00:00:00 2001 From: Matej Genci Date: Wed, 11 Sep 2019 12:49:53 +0000 Subject: virtio: add VIRTIO_RING_NO_LEGACY Add macro to disable legacy vring functions. Signed-off-by: Matej Genci Link: https://lore.kernel.org/r/20190911124942.243713-1-matej.genci@nutanix.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ring.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 559f42e73315..9223c3a5c46a 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -135,6 +135,8 @@ struct vring { #define VRING_USED_ALIGN_SIZE 4 #define VRING_DESC_ALIGN_SIZE 16 +#ifndef VIRTIO_RING_NO_LEGACY + /* The standard layout for the ring is a continuous chunk of memory which looks * like this. We assume num is a power of 2. * @@ -181,6 +183,8 @@ static inline unsigned vring_size(unsigned int num, unsigned long align) + sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num; } +#endif /* VIRTIO_RING_NO_LEGACY */ + /* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ /* Assuming a given event_idx value from the other side, if * we have just incremented index from old to new_idx, -- cgit v1.2.3 From a865e420b9561235851c3f5d483c82ef389d29bd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 6 Apr 2020 08:42:55 -0400 Subject: virtio: force spec specified alignment on types The ring element addresses are passed between components with different alignments assumptions. Thus, if guest/userspace selects a pointer and host then gets and dereferences it, we might need to decrease the compiler-selected alignment to prevent compiler on the host from assuming pointer is aligned. This actually triggers on ARM with -mabi=apcs-gnu - which is a deprecated configuration, but it seems safer to handle this generally. Note that userspace that allocates the memory is actually OK and does not need to be fixed, but userspace that gets it from guest or another process does need to be fixed. The later doesn't generally talk to the kernel so while it might be buggy it's not talking to the kernel in the buggy way - it's just using the header in the buggy way - so fixing header and asking userspace to recompile is the best we can do. I verified that the produced kernel binary on x86 is exactly identical before and after the change. Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/uapi/linux/virtio_ring.h | 46 ++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 11 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 9223c3a5c46a..476d3e5c0fe7 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -86,6 +86,13 @@ * at the end of the used ring. Guest should ignore the used->flags field. */ #define VIRTIO_RING_F_EVENT_IDX 29 +/* Alignment requirements for vring elements. + * When using pre-virtio 1.0 layout, these fall out naturally. + */ +#define VRING_AVAIL_ALIGN_SIZE 2 +#define VRING_USED_ALIGN_SIZE 4 +#define VRING_DESC_ALIGN_SIZE 16 + /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ struct vring_desc { /* Address (guest-physical). */ @@ -112,29 +119,46 @@ struct vring_used_elem { __virtio32 len; }; +typedef struct vring_used_elem __attribute__((aligned(VRING_USED_ALIGN_SIZE))) + vring_used_elem_t; + struct vring_used { __virtio16 flags; __virtio16 idx; - struct vring_used_elem ring[]; + vring_used_elem_t ring[]; }; +/* + * The ring element addresses are passed between components with different + * alignments assumptions. Thus, we might need to decrease the compiler-selected + * alignment, and so must use a typedef to make sure the aligned attribute + * actually takes hold: + * + * https://gcc.gnu.org/onlinedocs//gcc/Common-Type-Attributes.html#Common-Type-Attributes + * + * When used on a struct, or struct member, the aligned attribute can only + * increase the alignment; in order to decrease it, the packed attribute must + * be specified as well. When used as part of a typedef, the aligned attribute + * can both increase and decrease alignment, and specifying the packed + * attribute generates a warning. + */ +typedef struct vring_desc __attribute__((aligned(VRING_DESC_ALIGN_SIZE))) + vring_desc_t; +typedef struct vring_avail __attribute__((aligned(VRING_AVAIL_ALIGN_SIZE))) + vring_avail_t; +typedef struct vring_used __attribute__((aligned(VRING_USED_ALIGN_SIZE))) + vring_used_t; + struct vring { unsigned int num; - struct vring_desc *desc; + vring_desc_t *desc; - struct vring_avail *avail; + vring_avail_t *avail; - struct vring_used *used; + vring_used_t *used; }; -/* Alignment requirements for vring elements. - * When using pre-virtio 1.0 layout, these fall out naturally. - */ -#define VRING_AVAIL_ALIGN_SIZE 2 -#define VRING_USED_ALIGN_SIZE 4 -#define VRING_DESC_ALIGN_SIZE 16 - #ifndef VIRTIO_RING_NO_LEGACY /* The standard layout for the ring is a continuous chunk of memory which looks -- cgit v1.2.3 From 24c986748ba670c903a9d6a11ee96de2b3f5f1b8 Mon Sep 17 00:00:00 2001 From: Farhan Ali Date: Tue, 5 May 2020 14:27:41 +0200 Subject: vfio-ccw: Introduce a new schib region The schib region can be used by userspace to get the subchannel- information block (SCHIB) for the passthrough subchannel. This can be useful to get information such as channel path information via the SCHIB.PMCW fields. Signed-off-by: Farhan Ali Signed-off-by: Eric Farman Reviewed-by: Cornelia Huck Message-Id: <20200505122745.53208-5-farman@linux.ibm.com> Signed-off-by: Cornelia Huck --- include/uapi/linux/vfio.h | 1 + include/uapi/linux/vfio_ccw.h | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 015516bcfaa3..7a1abbd889bd 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -378,6 +378,7 @@ struct vfio_region_gfx_edid { /* sub-types for VFIO_REGION_TYPE_CCW */ #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) +#define VFIO_REGION_SUBTYPE_CCW_SCHIB (2) /* * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped diff --git a/include/uapi/linux/vfio_ccw.h b/include/uapi/linux/vfio_ccw.h index cbecbf0cd54f..758bf214898d 100644 --- a/include/uapi/linux/vfio_ccw.h +++ b/include/uapi/linux/vfio_ccw.h @@ -34,4 +34,14 @@ struct ccw_cmd_region { __u32 ret_code; } __packed; +/* + * Used for processing commands that read the subchannel-information block + * Reading this region triggers a stsch() to hardware + * Note: this is controlled by a capability + */ +struct ccw_schib_region { +#define SCHIB_AREA_SIZE 52 + __u8 schib_area[SCHIB_AREA_SIZE]; +} __packed; + #endif -- cgit v1.2.3 From 836e66c218f355ec01ba57671c85abf32961dcea Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 2 Jun 2020 16:58:32 +0200 Subject: bpf: Fix up bpf_skb_adjust_room helper's skb csum setting Lorenz recently reported: In our TC classifier cls_redirect [0], we use the following sequence of helper calls to decapsulate a GUE (basically IP + UDP + custom header) encapsulated packet: bpf_skb_adjust_room(skb, -encap_len, BPF_ADJ_ROOM_MAC, BPF_F_ADJ_ROOM_FIXED_GSO) bpf_redirect(skb->ifindex, BPF_F_INGRESS) It seems like some checksums of the inner headers are not validated in this case. For example, a TCP SYN packet with invalid TCP checksum is still accepted by the network stack and elicits a SYN ACK. [...] That is, we receive the following packet from the driver: | ETH | IP | UDP | GUE | IP | TCP | skb->ip_summed == CHECKSUM_UNNECESSARY ip_summed is CHECKSUM_UNNECESSARY because our NICs do rx checksum offloading. On this packet we run skb_adjust_room_mac(-encap_len), and get the following: | ETH | IP | TCP | skb->ip_summed == CHECKSUM_UNNECESSARY Note that ip_summed is still CHECKSUM_UNNECESSARY. After bpf_redirect()'ing into the ingress, we end up in tcp_v4_rcv(). There, skb_checksum_init() is turned into a no-op due to CHECKSUM_UNNECESSARY. The bpf_skb_adjust_room() helper is not aware of protocol specifics. Internally, it handles the CHECKSUM_COMPLETE case via skb_postpull_rcsum(), but that does not cover CHECKSUM_UNNECESSARY. In this case skb->csum_level of the original skb prior to bpf_skb_adjust_room() call was 0, that is, covering UDP. Right now there is no way to adjust the skb->csum_level. NICs that have checksum offload disabled (CHECKSUM_NONE) or that support CHECKSUM_COMPLETE are not affected. Use a safe default for CHECKSUM_UNNECESSARY by resetting to CHECKSUM_NONE and add a flag to the helper called BPF_F_ADJ_ROOM_NO_CSUM_RESET that allows users from opting out. Opting out is useful for the case where we don't remove/add full protocol headers, or for the case where a user wants to adjust the csum level manually e.g. through bpf_csum_level() helper that is added in subsequent patch. The bpf_skb_proto_{4_to_6,6_to_4}() for NAT64/46 translation from the BPF bpf_skb_change_proto() helper uses bpf_skb_net_hdr_{push,pop}() pair internally as well but doesn't change layers, only transitions between v4 to v6 and vice versa, therefore no adoption is required there. [0] https://lore.kernel.org/bpf/20200424185556.7358-1-lmb@cloudflare.com/ Fixes: 2be7e212d541 ("bpf: add bpf_skb_adjust_room helper") Reported-by: Lorenz Bauer Reported-by: Alan Maguire Signed-off-by: Daniel Borkmann Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Reviewed-by: Alan Maguire Link: https://lore.kernel.org/bpf/CACAyw9-uU_52esMd1JjuA80fRPHJv5vsSg8GnfW3t_qDU4aVKQ@mail.gmail.com/ Link: https://lore.kernel.org/bpf/11a90472e7cce83e76ddbfce81fdfce7bfc68808.1591108731.git.daniel@iogearbox.net --- include/uapi/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b9ed9f14f2a2..3ba2bbbed80c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1635,6 +1635,13 @@ union bpf_attr { * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. * + * By default, the helper will reset any offloaded checksum + * indicator of the skb to CHECKSUM_NONE. This can be avoided + * by the following flag: + * + * * **BPF_F_ADJ_ROOM_NO_CSUM_RESET**: Do not reset offloaded + * checksum data of the skb to CHECKSUM_NONE. + * * There are two supported modes at this time: * * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer @@ -3433,6 +3440,7 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2), BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), + BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5), }; enum { -- cgit v1.2.3 From 7cdec54f9713256bb170873a1fc5c75c9127c9d2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 2 Jun 2020 16:58:33 +0200 Subject: bpf: Add csum_level helper for fixing up csum levels Add a bpf_csum_level() helper which BPF programs can use in combination with bpf_skb_adjust_room() when they pass in BPF_F_ADJ_ROOM_NO_CSUM_RESET flag to the latter to avoid falling back to CHECKSUM_NONE. The bpf_csum_level() allows to adjust CHECKSUM_UNNECESSARY skb->csum_levels via BPF_CSUM_LEVEL_{INC,DEC} which calls __skb_{incr,decr}_checksum_unnecessary() on the skb. The helper also allows a BPF_CSUM_LEVEL_RESET which sets the skb's csum to CHECKSUM_NONE as well as a BPF_CSUM_LEVEL_QUERY to just return the current level. Without this helper, there is no way to otherwise adjust the skb->csum_level. I did not add an extra dummy flags as there is plenty of free bitspace in level argument itself iff ever needed in future. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Reviewed-by: Alan Maguire Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/279ae3717cb3d03c0ffeb511493c93c450a01e1a.1591108731.git.daniel@iogearbox.net --- include/uapi/linux/bpf.h | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3ba2bbbed80c..c65b374a5090 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3220,6 +3220,38 @@ union bpf_attr { * calculation. * Return * Requested value, or 0, if flags are not recognized. + * + * int bpf_csum_level(struct sk_buff *skb, u64 level) + * Description + * Change the skbs checksum level by one layer up or down, or + * reset it entirely to none in order to have the stack perform + * checksum validation. The level is applicable to the following + * protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of + * | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP | + * through **bpf_skb_adjust_room**\ () helper with passing in + * **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call + * to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since + * the UDP header is removed. Similarly, an encap of the latter + * into the former could be accompanied by a helper call to + * **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the + * skb is still intended to be processed in higher layers of the + * stack instead of just egressing at tc. + * + * There are three supported level settings at this time: + * + * * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs + * with CHECKSUM_UNNECESSARY. + * * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs + * with CHECKSUM_UNNECESSARY. + * * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and + * sets CHECKSUM_NONE to force checksum validation by the stack. + * * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current + * skb->csum_level. + * Return + * 0 on success, or a negative error in case of failure. In the + * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level + * is returned or the error code -EACCES in case the skb is not + * subject to CHECKSUM_UNNECESSARY. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3356,7 +3388,8 @@ union bpf_attr { FN(ringbuf_reserve), \ FN(ringbuf_submit), \ FN(ringbuf_discard), \ - FN(ringbuf_query), + FN(ringbuf_query), \ + FN(csum_level), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3433,6 +3466,14 @@ enum { BPF_F_CURRENT_NETNS = (-1L), }; +/* BPF_FUNC_csum_level level values. */ +enum { + BPF_CSUM_LEVEL_QUERY, + BPF_CSUM_LEVEL_INC, + BPF_CSUM_LEVEL_DEC, + BPF_CSUM_LEVEL_RESET, +}; + /* BPF_FUNC_skb_adjust_room flags. */ enum { BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), -- cgit v1.2.3 From 56f2e3b7d819f4fa44857ba81aa6870f18714ea0 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 2 Jun 2020 10:17:28 +0100 Subject: capabilities: add description for CAP_SETFCAP Document the purpose of CAP_SETFCAP. For some reason this capability had no description while the others did. Signed-off-by: Stefan Hajnoczi Signed-off-by: James Morris --- include/uapi/linux/capability.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 240fdb9a60f6..877972fdfda3 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -331,6 +331,8 @@ struct vfs_ns_cap_data { #define CAP_AUDIT_CONTROL 30 +/* Set or remove capabilities on files */ + #define CAP_SETFCAP 31 /* Override MAC access. -- cgit v1.2.3 From d8cac29b1d52204e6632d2887eff766acd02b9aa Mon Sep 17 00:00:00 2001 From: Farhan Ali Date: Tue, 5 May 2020 14:27:43 +0200 Subject: vfio-ccw: Introduce a new CRW region This region provides a mechanism to pass a Channel Report Word that affect vfio-ccw devices, and needs to be passed to the guest for its awareness and/or processing. The base driver (see crw_collect_info()) provides space for two CRWs, as a subchannel event may have two CRWs chained together (one for the ssid, one for the subchannel). As vfio-ccw will deal with everything at the subchannel level, provide space for a single CRW to be transferred in one shot. Signed-off-by: Farhan Ali Signed-off-by: Eric Farman Reviewed-by: Cornelia Huck Message-Id: <20200505122745.53208-7-farman@linux.ibm.com> [CH: added padding to ccw_crw_region] Signed-off-by: Cornelia Huck --- include/uapi/linux/vfio.h | 2 ++ include/uapi/linux/vfio_ccw.h | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 7a1abbd889bd..907758cf6d60 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -379,6 +379,7 @@ struct vfio_region_gfx_edid { /* sub-types for VFIO_REGION_TYPE_CCW */ #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) #define VFIO_REGION_SUBTYPE_CCW_SCHIB (2) +#define VFIO_REGION_SUBTYPE_CCW_CRW (3) /* * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped @@ -578,6 +579,7 @@ enum { enum { VFIO_CCW_IO_IRQ_INDEX, + VFIO_CCW_CRW_IRQ_INDEX, VFIO_CCW_NUM_IRQS }; diff --git a/include/uapi/linux/vfio_ccw.h b/include/uapi/linux/vfio_ccw.h index 758bf214898d..aa04f3aa6db0 100644 --- a/include/uapi/linux/vfio_ccw.h +++ b/include/uapi/linux/vfio_ccw.h @@ -44,4 +44,13 @@ struct ccw_schib_region { __u8 schib_area[SCHIB_AREA_SIZE]; } __packed; +/* + * Used for returning a Channel Report Word to userspace. + * Note: this is controlled by a capability + */ +struct ccw_crw_region { + __u32 crw; + __u32 pad; +} __packed; + #endif -- cgit v1.2.3 From 10c5db286452b8c60e8f58e9a4c1cbc5a91e4e5b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 May 2020 09:30:11 +0200 Subject: fs: move the fiemap definitions out of fs.h No need to pull the fiemap definitions into almost every file in the kernel build. Signed-off-by: Christoph Hellwig Reviewed-by: Ritesh Harjani Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20200523073016.2944131-5-hch@lst.de Signed-off-by: Theodore Ts'o --- include/uapi/linux/fiemap.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fiemap.h b/include/uapi/linux/fiemap.h index 7a900b2377b6..24ca0c00cae3 100644 --- a/include/uapi/linux/fiemap.h +++ b/include/uapi/linux/fiemap.h @@ -9,8 +9,8 @@ * Andreas Dilger */ -#ifndef _LINUX_FIEMAP_H -#define _LINUX_FIEMAP_H +#ifndef _UAPI_LINUX_FIEMAP_H +#define _UAPI_LINUX_FIEMAP_H #include @@ -67,4 +67,4 @@ struct fiemap { #define FIEMAP_EXTENT_SHARED 0x00002000 /* Space shared with other * files. */ -#endif /* _LINUX_FIEMAP_H */ +#endif /* _UAPI_LINUX_FIEMAP_H */ -- cgit v1.2.3 From 5f1f79bbc9e26fa9412fa9522f957bb8f030c442 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 7 May 2020 16:01:25 +0200 Subject: virtio-mem: Paravirtualized memory hotplug Each virtio-mem device owns exactly one memory region. It is responsible for adding/removing memory from that memory region on request. When the device driver starts up, the requested amount of memory is queried and then plugged to Linux. On request, further memory can be plugged or unplugged. This patch only implements the plugging part. On x86-64, memory can currently be plugged in 4MB ("subblock") granularity. When required, a new memory block will be added (e.g., usually 128MB on x86-64) in order to plug more subblocks. Only x86-64 was tested for now. The online_page callback is used to keep unplugged subblocks offline when onlining memory - similar to the Hyper-V balloon driver. Unplugged pages are marked PG_offline, to tell dump tools (e.g., makedumpfile) to skip them. User space is usually responsible for onlining the added memory. The memory hotplug notifier is used to synchronize virtio-mem activity against memory onlining/offlining. Each virtio-mem device can belong to a NUMA node, which allows us to easily add/remove small chunks of memory to/from a specific NUMA node by using multiple virtio-mem devices. Something that works even when the guest has no idea about the NUMA topology. One way to view virtio-mem is as a "resizable DIMM" or a DIMM with many "sub-DIMMS". This patch directly introduces the basic infrastructure to implement memory unplug. Especially the memory block states and subblock bitmaps will be heavily used there. Notes: - In case memory is to be onlined by user space, we limit the amount of offline memory blocks, to not run out of memory. This is esp. an issue if memory is added faster than it is getting onlined. - Suspend/Hibernate is not supported due to the way virtio-mem devices behave. Limited support might be possible in the future. - Reloading the device driver is not supported. Reviewed-by: Pankaj Gupta Tested-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Oscar Salvador Cc: Michal Hocko Cc: Igor Mammedov Cc: Dave Young Cc: Andrew Morton Cc: Dan Williams Cc: Pavel Tatashin Cc: Stefan Hajnoczi Cc: Vlastimil Babka Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: linux-acpi@vger.kernel.org Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20200507140139.17083-2-david@redhat.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ids.h | 1 + include/uapi/linux/virtio_mem.h | 200 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+) create mode 100644 include/uapi/linux/virtio_mem.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index ecc27a17401a..b052355ac7a3 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -44,6 +44,7 @@ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ +#define VIRTIO_ID_MEM 24 /* virtio mem */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h new file mode 100644 index 000000000000..1bfade78bdfd --- /dev/null +++ b/include/uapi/linux/virtio_mem.h @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Virtio Mem Device + * + * Copyright Red Hat, Inc. 2020 + * + * Authors: + * David Hildenbrand + * + * This header is BSD licensed so anyone can use the definitions + * to implement compatible drivers/servers: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _LINUX_VIRTIO_MEM_H +#define _LINUX_VIRTIO_MEM_H + +#include +#include +#include +#include + +/* + * Each virtio-mem device manages a dedicated region in physical address + * space. Each device can belong to a single NUMA node, multiple devices + * for a single NUMA node are possible. A virtio-mem device is like a + * "resizable DIMM" consisting of small memory blocks that can be plugged + * or unplugged. The device driver is responsible for (un)plugging memory + * blocks on demand. + * + * Virtio-mem devices can only operate on their assigned memory region in + * order to (un)plug memory. A device cannot (un)plug memory belonging to + * other devices. + * + * The "region_size" corresponds to the maximum amount of memory that can + * be provided by a device. The "size" corresponds to the amount of memory + * that is currently plugged. "requested_size" corresponds to a request + * from the device to the device driver to (un)plug blocks. The + * device driver should try to (un)plug blocks in order to reach the + * "requested_size". It is impossible to plug more memory than requested. + * + * The "usable_region_size" represents the memory region that can actually + * be used to (un)plug memory. It is always at least as big as the + * "requested_size" and will grow dynamically. It will only shrink when + * explicitly triggered (VIRTIO_MEM_REQ_UNPLUG). + * + * There are no guarantees what will happen if unplugged memory is + * read/written. Such memory should, in general, not be touched. E.g., + * even writing might succeed, but the values will simply be discarded at + * random points in time. + * + * It can happen that the device cannot process a request, because it is + * busy. The device driver has to retry later. + * + * Usually, during system resets all memory will get unplugged, so the + * device driver can start with a clean state. However, in specific + * scenarios (if the device is busy) it can happen that the device still + * has memory plugged. The device driver can request to unplug all memory + * (VIRTIO_MEM_REQ_UNPLUG) - which might take a while to succeed if the + * device is busy. + */ + +/* --- virtio-mem: guest -> host requests --- */ + +/* request to plug memory blocks */ +#define VIRTIO_MEM_REQ_PLUG 0 +/* request to unplug memory blocks */ +#define VIRTIO_MEM_REQ_UNPLUG 1 +/* request to unplug all blocks and shrink the usable size */ +#define VIRTIO_MEM_REQ_UNPLUG_ALL 2 +/* request information about the plugged state of memory blocks */ +#define VIRTIO_MEM_REQ_STATE 3 + +struct virtio_mem_req_plug { + __virtio64 addr; + __virtio16 nb_blocks; +}; + +struct virtio_mem_req_unplug { + __virtio64 addr; + __virtio16 nb_blocks; +}; + +struct virtio_mem_req_state { + __virtio64 addr; + __virtio16 nb_blocks; +}; + +struct virtio_mem_req { + __virtio16 type; + __virtio16 padding[3]; + + union { + struct virtio_mem_req_plug plug; + struct virtio_mem_req_unplug unplug; + struct virtio_mem_req_state state; + } u; +}; + + +/* --- virtio-mem: host -> guest response --- */ + +/* + * Request processed successfully, applicable for + * - VIRTIO_MEM_REQ_PLUG + * - VIRTIO_MEM_REQ_UNPLUG + * - VIRTIO_MEM_REQ_UNPLUG_ALL + * - VIRTIO_MEM_REQ_STATE + */ +#define VIRTIO_MEM_RESP_ACK 0 +/* + * Request denied - e.g. trying to plug more than requested, applicable for + * - VIRTIO_MEM_REQ_PLUG + */ +#define VIRTIO_MEM_RESP_NACK 1 +/* + * Request cannot be processed right now, try again later, applicable for + * - VIRTIO_MEM_REQ_PLUG + * - VIRTIO_MEM_REQ_UNPLUG + * - VIRTIO_MEM_REQ_UNPLUG_ALL + */ +#define VIRTIO_MEM_RESP_BUSY 2 +/* + * Error in request (e.g. addresses/alignment), applicable for + * - VIRTIO_MEM_REQ_PLUG + * - VIRTIO_MEM_REQ_UNPLUG + * - VIRTIO_MEM_REQ_STATE + */ +#define VIRTIO_MEM_RESP_ERROR 3 + + +/* State of memory blocks is "plugged" */ +#define VIRTIO_MEM_STATE_PLUGGED 0 +/* State of memory blocks is "unplugged" */ +#define VIRTIO_MEM_STATE_UNPLUGGED 1 +/* State of memory blocks is "mixed" */ +#define VIRTIO_MEM_STATE_MIXED 2 + +struct virtio_mem_resp_state { + __virtio16 state; +}; + +struct virtio_mem_resp { + __virtio16 type; + __virtio16 padding[3]; + + union { + struct virtio_mem_resp_state state; + } u; +}; + +/* --- virtio-mem: configuration --- */ + +struct virtio_mem_config { + /* Block size and alignment. Cannot change. */ + __u32 block_size; + __u32 padding; + /* Start address of the memory region. Cannot change. */ + __u64 addr; + /* Region size (maximum). Cannot change. */ + __u64 region_size; + /* + * Currently usable region size. Can grow up to region_size. Can + * shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config + * update will be sent). + */ + __u64 usable_region_size; + /* + * Currently used size. Changes due to plug/unplug requests, but no + * config updates will be sent. + */ + __u64 plugged_size; + /* Requested size. New plug requests cannot exceed it. Can change. */ + __u64 requested_size; +}; + +#endif /* _LINUX_VIRTIO_MEM_H */ -- cgit v1.2.3 From f2af6d3978d74a7891d0f428537b4494498202cb Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 7 May 2020 16:01:27 +0200 Subject: virtio-mem: Allow to specify an ACPI PXM as nid We want to allow to specify (similar as for a DIMM), to which node a virtio-mem device (and, therefore, its memory) belongs. Add a new virtio-mem feature flag and export pxm_to_node, so it can be used in kernel module context. Acked-by: Michal Hocko # for the export Acked-by: "Rafael J. Wysocki" # for the export Acked-by: Pankaj Gupta Tested-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Oscar Salvador Cc: Michal Hocko Cc: Igor Mammedov Cc: Dave Young Cc: Andrew Morton Cc: Dan Williams Cc: Pavel Tatashin Cc: Stefan Hajnoczi Cc: Vlastimil Babka Cc: Len Brown Cc: linux-acpi@vger.kernel.org Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20200507140139.17083-4-david@redhat.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_mem.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h index 1bfade78bdfd..e0a9dc7397c3 100644 --- a/include/uapi/linux/virtio_mem.h +++ b/include/uapi/linux/virtio_mem.h @@ -83,6 +83,12 @@ * device is busy. */ +/* --- virtio-mem: feature bits --- */ + +/* node_id is an ACPI PXM and is valid */ +#define VIRTIO_MEM_F_ACPI_PXM 0 + + /* --- virtio-mem: guest -> host requests --- */ /* request to plug memory blocks */ @@ -177,7 +183,9 @@ struct virtio_mem_resp { struct virtio_mem_config { /* Block size and alignment. Cannot change. */ __u32 block_size; - __u32 padding; + /* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */ + __u16 node_id; + __u16 padding; /* Start address of the memory region. Cannot change. */ __u64 addr; /* Region size (maximum). Cannot change. */ -- cgit v1.2.3 From fce8afd76e3a4d8c59c92f84f8027569fd7031d0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 15 May 2020 12:14:02 +0200 Subject: virtio-mem: Don't rely on implicit compiler padding for requests The compiler will add padding after the last member, make that explicit. The size of a request is always 24 bytes. The size of a response always 10 bytes. Add compile-time checks. Cc: "Michael S. Tsirkin" Cc: Pankaj Gupta Cc: teawater Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20200515101402.16597-1-david@redhat.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_mem.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h index e0a9dc7397c3..a455c488a995 100644 --- a/include/uapi/linux/virtio_mem.h +++ b/include/uapi/linux/virtio_mem.h @@ -103,16 +103,19 @@ struct virtio_mem_req_plug { __virtio64 addr; __virtio16 nb_blocks; + __virtio16 padding[3]; }; struct virtio_mem_req_unplug { __virtio64 addr; __virtio16 nb_blocks; + __virtio16 padding[3]; }; struct virtio_mem_req_state { __virtio64 addr; __virtio16 nb_blocks; + __virtio16 padding[3]; }; struct virtio_mem_req { -- cgit v1.2.3 From f286d627ef026a4d04b41ae5917d58ddf243c3c5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 13 Jan 2020 21:21:49 +0100 Subject: gfs2: Keep track of deleted inode generations in LVBs When deleting an inode, keep track of the generation of the deleted inode in the inode glock Lock Value Block (LVB). When trying to delete an inode remotely, check the last-known inode generation against the deleted inode generation to skip duplicate remote deletes. This avoids taking the resource group glock in order to verify the block type. Signed-off-by: Andreas Gruenbacher --- include/uapi/linux/gfs2_ondisk.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/gfs2_ondisk.h b/include/uapi/linux/gfs2_ondisk.h index 2dc10a034de1..07e508e6691b 100644 --- a/include/uapi/linux/gfs2_ondisk.h +++ b/include/uapi/linux/gfs2_ondisk.h @@ -171,6 +171,12 @@ struct gfs2_rindex { #define GFS2_RGF_NOALLOC 0x00000008 #define GFS2_RGF_TRIMMED 0x00000010 +struct gfs2_inode_lvb { + __be32 ri_magic; + __be32 __pad; + __be64 ri_generation_deleted; +}; + struct gfs2_rgrp_lvb { __be32 rl_magic; __be32 rl_flags; -- cgit v1.2.3 From 776f395004d829bbbf18c159ed9beb517a208c71 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Fri, 5 Jun 2020 18:27:13 +0800 Subject: vhost_vdpa: Support config interrupt in vdpa This commit implements config interrupt support in vhost_vdpa layer. Signed-off-by: Zhu Lingshan Acked-by: Jason Wang Link: https://lore.kernel.org/r/1591352835-22441-4-git-send-email-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 9fe72e4b1373..0c2349612e77 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -15,6 +15,8 @@ #include #include +#define VHOST_FILE_UNBIND -1 + /* ioctls */ #define VHOST_VIRTIO 0xAF @@ -140,4 +142,6 @@ /* Get the max ring size. */ #define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16) +/* Set event fd for config interrupt*/ +#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) #endif -- cgit v1.2.3 From 97eda66421c44f1449e8d087fd05eab5d466afb7 Mon Sep 17 00:00:00 2001 From: Flavio Suligoi Date: Fri, 5 Jun 2020 17:41:11 +0200 Subject: include: fix wiki website url in netlink interface header The wiki url is still the old "wireless.kernel.org" instead of the new "wireless.wiki.kernel.org" Signed-off-by: Flavio Suligoi Link: https://lore.kernel.org/r/20200605154112.16277-9-f.suligoi@asem.it Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index dad8c8f8581f..4e6339ab1fce 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -794,7 +794,7 @@ * various triggers. These triggers can be configured through this * command with the %NL80211_ATTR_WOWLAN_TRIGGERS attribute. For * more background information, see - * http://wireless.kernel.org/en/users/Documentation/WoWLAN. + * https://wireless.wiki.kernel.org/en/users/Documentation/WoWLAN. * The @NL80211_CMD_SET_WOWLAN command can also be used as a notification * from the driver reporting the wakeup reason. In this case, the * @NL80211_ATTR_WOWLAN_TRIGGERS attribute will contain the reason -- cgit v1.2.3 From 544fc7dbbf920a3e64d109c416ee229e8e1763c5 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 8 Jun 2020 02:03:15 -0400 Subject: virtio_mem: convert device block size into 64bit If subblock size is large (e.g. 1G) 32 bit math involving it can overflow. Rather than try to catch all instances of that, let's tweak block size to 64 bit. It ripples through UAPI which is an ABI change, but it's not too late to make it, and it will allow supporting >4Gbyte blocks while might become necessary down the road. Fixes: 5f1f79bbc9e26 ("virtio-mem: Paravirtualized memory hotplug") Signed-off-by: Michael S. Tsirkin Acked-by: David Hildenbrand --- include/uapi/linux/virtio_mem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h index a455c488a995..a9ffe041843c 100644 --- a/include/uapi/linux/virtio_mem.h +++ b/include/uapi/linux/virtio_mem.h @@ -185,10 +185,10 @@ struct virtio_mem_resp { struct virtio_mem_config { /* Block size and alignment. Cannot change. */ - __u32 block_size; + __u64 block_size; /* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */ __u16 node_id; - __u16 padding; + __u8 padding[6]; /* Start address of the memory region. Cannot change. */ __u64 addr; /* Region size (maximum). Cannot change. */ -- cgit v1.2.3 From 281920b7e0b31e0a7706433ff58e7d52ac97c327 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 9 Jun 2020 15:31:46 +0200 Subject: bpf: Devmap adjust uapi for attach bpf program V2: - Defer changing BPF-syscall to start at file-descriptor 1 - Use {} to zero initialise struct. The recent commit fbee97feed9b ("bpf: Add support to attach bpf program to a devmap entry"), introduced ability to attach (and run) a separate XDP bpf_prog for each devmap entry. A bpf_prog is added via a file-descriptor. As zero were a valid FD, not using the feature requires using value minus-1. The UAPI is extended via tail-extending struct bpf_devmap_val and using map->value_size to determine the feature set. This will break older userspace applications not using the bpf_prog feature. Consider an old userspace app that is compiled against newer kernel uapi/bpf.h, it will not know that it need to initialise the member bpf_prog.fd to minus-1. Thus, users will be forced to update source code to get program running on newer kernels. This patch remove the minus-1 checks, and have zero mean feature isn't used. Followup patches either for kernel or libbpf should handle and avoid returning file-descriptor zero in the first place. Fixes: fbee97feed9b ("bpf: Add support to attach bpf program to a devmap entry") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/159170950687.2102545.7235914718298050113.stgit@firesoul --- include/uapi/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c65b374a5090..19684813faae 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3761,6 +3761,19 @@ struct xdp_md { __u32 egress_ifindex; /* txq->dev->ifindex */ }; +/* DEVMAP map-value layout + * + * The struct data-layout of map-value is a configuration interface. + * New members can only be added to the end of this structure. + */ +struct bpf_devmap_val { + __u32 ifindex; /* device index */ + union { + int fd; /* prog fd on map write */ + __u32 id; /* prog id on map read */ + } bpf_prog; +}; + enum sk_action { SK_DROP = 0, SK_PASS, -- cgit v1.2.3 From 7bb64402a092136fb2fda995b0e0304b43c163c5 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Thu, 11 Jun 2020 20:56:52 +0800 Subject: spi: tools: Add macro definitions to fix build errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SPI_TX_OCTAL and SPI_RX_OCTAL to fix the following build errors: CC spidev_test.o spidev_test.c: In function ‘transfer’: spidev_test.c:131:13: error: ‘SPI_TX_OCTAL’ undeclared (first use in this function) if (mode & SPI_TX_OCTAL) ^ spidev_test.c:131:13: note: each undeclared identifier is reported only once for each function it appears in spidev_test.c:137:13: error: ‘SPI_RX_OCTAL’ undeclared (first use in this function) if (mode & SPI_RX_OCTAL) ^ spidev_test.c: In function ‘parse_opts’: spidev_test.c:290:12: error: ‘SPI_TX_OCTAL’ undeclared (first use in this function) mode |= SPI_TX_OCTAL; ^ spidev_test.c:308:12: error: ‘SPI_RX_OCTAL’ undeclared (first use in this function) mode |= SPI_RX_OCTAL; ^ LD spidev_test-in.o ld: cannot find spidev_test.o: No such file or directory Additionally, maybe SPI_CS_WORD and SPI_3WIRE_HIZ will be used in the future, so add them too. Fixes: 896fa735084e ("spi: spidev_test: Add support for Octal mode data transfers") Signed-off-by: Qing Zhang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/1591880212-13479-2-git-send-email-zhangqing@loongson.cn Signed-off-by: Mark Brown --- include/uapi/linux/spi/spidev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/spi/spidev.h b/include/uapi/linux/spi/spidev.h index ee0f2460bff6..9390615d52f0 100644 --- a/include/uapi/linux/spi/spidev.h +++ b/include/uapi/linux/spi/spidev.h @@ -48,6 +48,10 @@ #define SPI_TX_QUAD 0x200 #define SPI_RX_DUAL 0x400 #define SPI_RX_QUAD 0x800 +#define SPI_CS_WORD 0x1000 +#define SPI_TX_OCTAL 0x2000 +#define SPI_RX_OCTAL 0x4000 +#define SPI_3WIRE_HIZ 0x8000 /*---------------------------------------------------------------------------*/ -- cgit v1.2.3 From 88ee9d571b6d8ed345f877e05f685814412e359b Mon Sep 17 00:00:00 2001 From: "Jan (janneke) Nieuwenhuizen" Date: Mon, 25 May 2020 21:39:40 +0200 Subject: ext4: support xattr gnu.* namespace for the Hurd The Hurd gained[0] support for moving the translator and author fields out of the inode and into the "gnu.*" xattr namespace. In anticipation of that, an xattr INDEX was reserved[1]. The Hurd has now been brought into compliance[2] with that. This patch adds support for reading and writing such attributes from Linux; you can now do something like mkdir -p hurd-root/servers/socket touch hurd-root/servers/socket/1 setfattr --name=gnu.translator --value='"/hurd/pflocal\0"' \ hurd-root/servers/socket/1 getfattr --name=gnu.translator hurd-root/servers/socket/1 # file: 1 gnu.translator="/hurd/pflocal" to setup a pipe translator, which is being used to create[3] a vm-image for the Hurd from GNU Guix. [0] https://summerofcode.withgoogle.com/projects/#5869799859027968 [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3980bd3b406addb327d858aebd19e229ea340b9a [2] https://git.savannah.gnu.org/cgit/hurd/hurd.git/commit/?id=a04c7bf83172faa7cb080fbe3b6c04a8415ca645 [3] https://git.savannah.gnu.org/cgit/guix.git/log/?h=wip-hurd-vm Signed-off-by: Jan Nieuwenhuizen Link: https://lore.kernel.org/r/20200525193940.878-1-janneke@gnu.org Signed-off-by: Theodore Ts'o --- include/uapi/linux/xattr.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h index c1395b5bd432..9463db2dfa9d 100644 --- a/include/uapi/linux/xattr.h +++ b/include/uapi/linux/xattr.h @@ -7,6 +7,7 @@ Copyright (C) 2001 by Andreas Gruenbacher Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. Copyright (c) 2004 Red Hat, Inc., James Morris + Copyright (c) 2020 Jan (janneke) Nieuwenhuizen */ #include @@ -31,6 +32,9 @@ #define XATTR_BTRFS_PREFIX "btrfs." #define XATTR_BTRFS_PREFIX_LEN (sizeof(XATTR_BTRFS_PREFIX) - 1) +#define XATTR_HURD_PREFIX "gnu." +#define XATTR_HURD_PREFIX_LEN (sizeof(XATTR_HURD_PREFIX) - 1) + #define XATTR_SECURITY_PREFIX "security." #define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1) -- cgit v1.2.3 From e17d43b93e544f5016c0251d2074c15568d5d963 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 12 May 2020 15:19:08 +0300 Subject: perf: Add perf text poke event Record (single instruction) changes to the kernel text (i.e. self-modifying code) in order to support tracers like Intel PT and ARM CoreSight. A copy of the running kernel code is needed as a reference point (e.g. from /proc/kcore). The text poke event records the old bytes and the new bytes so that the event can be processed forwards or backwards. The basic problem is recording the modified instruction in an unambiguous manner given SMP instruction cache (in)coherence. That is, when modifying an instruction concurrently any solution with one or multiple timestamps is not sufficient: CPU0 CPU1 0 1 write insn A 2 execute insn A 3 sync-I$ 4 Due to I$, CPU1 might execute either the old or new A. No matter where we record tracepoints on CPU0, one simply cannot tell what CPU1 will have observed, except that at 0 it must be the old one and at 4 it must be the new one. To solve this, take inspiration from x86 text poking, which has to solve this exact problem due to variable length instruction encoding and I-fetch windows. 1) overwrite the instruction with a breakpoint and sync I$ This guarantees that that code flow will never hit the target instruction anymore, on any CPU (or rather, it will cause an exception). 2) issue the TEXT_POKE event 3) overwrite the breakpoint with the new instruction and sync I$ Now we know that any execution after the TEXT_POKE event will either observe the breakpoint (and hit the exception) or the new instruction. So by guarding the TEXT_POKE event with an exception on either side; we can now tell, without doubt, which instruction another CPU will have observed. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200512121922.8997-2-adrian.hunter@intel.com --- include/uapi/linux/perf_event.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 7b2d6fc9e6ed..e5bee6c17b86 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -383,7 +383,8 @@ struct perf_event_attr { bpf_event : 1, /* include bpf events */ aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ - __reserved_1 : 31; + text_poke : 1, /* include text poke events */ + __reserved_1 : 30; union { __u32 wakeup_events; /* wakeup every n events */ @@ -1024,6 +1025,24 @@ enum perf_event_type { */ PERF_RECORD_CGROUP = 19, + /* + * Records changes to kernel text i.e. self-modified code. 'old_len' is + * the number of old bytes, 'new_len' is the number of new bytes. Either + * 'old_len' or 'new_len' may be zero to indicate, for example, the + * addition or removal of a trampoline. 'bytes' contains the old bytes + * followed immediately by the new bytes. + * + * struct { + * struct perf_event_header header; + * u64 addr; + * u16 old_len; + * u16 new_len; + * u8 bytes[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_TEXT_POKE = 20, + PERF_RECORD_MAX, /* non-ABI */ }; -- cgit v1.2.3 From 69e49088692899d25dedfa22f00dfb9761e86ed7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 12 May 2020 15:19:11 +0300 Subject: kprobes: Add perf ksymbol events for kprobe insn pages Symbols are needed for tools to describe instruction addresses. Pages allocated for kprobe's purposes need symbols to be created for them. Add such symbols to be visible via perf ksymbol events. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu Link: https://lkml.kernel.org/r/20200512121922.8997-5-adrian.hunter@intel.com --- include/uapi/linux/perf_event.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e5bee6c17b86..e1a4179144a1 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1049,6 +1049,11 @@ enum perf_event_type { enum perf_record_ksymbol_type { PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0, PERF_RECORD_KSYMBOL_TYPE_BPF = 1, + /* + * Out of line code such as kprobe-replaced instructions or optimized + * kprobes. + */ + PERF_RECORD_KSYMBOL_TYPE_OOL = 2, PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */ }; -- cgit v1.2.3 From dd9ddf466ad7a5d2e247925d81ebb0b878bf3b76 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 12 May 2020 15:19:14 +0300 Subject: ftrace: Add perf ksymbol events for ftrace trampolines Symbols are needed for tools to describe instruction addresses. Pages allocated for ftrace's purposes need symbols to be created for them. Add such symbols to be visible via perf ksymbol events. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200512121922.8997-8-adrian.hunter@intel.com --- include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e1a4179144a1..52ca2093831c 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1051,7 +1051,7 @@ enum perf_record_ksymbol_type { PERF_RECORD_KSYMBOL_TYPE_BPF = 1, /* * Out of line code such as kprobe-replaced instructions or optimized - * kprobes. + * kprobes or ftrace trampolines. */ PERF_RECORD_KSYMBOL_TYPE_OOL = 2, PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */ -- cgit v1.2.3 From 27784a256c2a453d891c0aaff84c3ac3f2e8a5a0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 13 Jun 2020 09:37:54 +0200 Subject: spi: uapi: spidev: Use TABs for alignment The UAPI uses TABs for alignment. Convert the recently introduced spaces to TABs to restore consistency. Fixes: 7bb64402a092136 ("spi: tools: Add macro definitions to fix build errors") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200613073755.15906-1-geert+renesas@glider.be Signed-off-by: Mark Brown --- include/uapi/linux/spi/spidev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/spi/spidev.h b/include/uapi/linux/spi/spidev.h index 9390615d52f0..d56427c0b3e0 100644 --- a/include/uapi/linux/spi/spidev.h +++ b/include/uapi/linux/spi/spidev.h @@ -48,10 +48,10 @@ #define SPI_TX_QUAD 0x200 #define SPI_RX_DUAL 0x400 #define SPI_RX_QUAD 0x800 -#define SPI_CS_WORD 0x1000 -#define SPI_TX_OCTAL 0x2000 -#define SPI_RX_OCTAL 0x4000 -#define SPI_3WIRE_HIZ 0x8000 +#define SPI_CS_WORD 0x1000 +#define SPI_TX_OCTAL 0x2000 +#define SPI_RX_OCTAL 0x4000 +#define SPI_3WIRE_HIZ 0x8000 /*---------------------------------------------------------------------------*/ -- cgit v1.2.3 From b0659d8a950d424e57cc0a67afc4740ee561224e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 15 Jun 2020 14:49:26 -0700 Subject: bpf: Fix definition of bpf_ringbuf_output() helper in UAPI comments Fix definition of bpf_ringbuf_output() in UAPI header comments, which is used to generate libbpf's bpf_helper_defs.h header. Return value is a number (error code), not a pointer. Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200615214926.3638836-1-andriin@fb.com --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 19684813faae..974a71342aea 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3168,7 +3168,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) + * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of -- cgit v1.2.3 From f517f7925b7b453cb83be06c268ba057b78e4792 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:06 +0530 Subject: ndctl/papr_scm,uapi: Add support for PAPR nvdimm specific methods Introduce support for PAPR NVDIMM Specific Methods (PDSM) in papr_scm module and add the command family NVDIMM_FAMILY_PAPR to the white list of NVDIMM command sets. Also advertise support for ND_CMD_CALL for the nvdimm command mask and implement necessary scaffolding in the module to handle ND_CMD_CALL ioctl and PDSM requests that we receive. The layout of the PDSM request as we expect from libnvdimm/libndctl is described in newly introduced uapi header 'papr_pdsm.h' which defines a 'struct nd_pkg_pdsm' and a maximal union named 'nd_pdsm_payload'. These new structs together with 'struct nd_cmd_pkg' for a pdsm envelop thats sent by libndctl to libnvdimm and serviced by papr_scm in 'papr_scm_service_pdsm()'. The PDSM request is communicated by member 'struct nd_cmd_pkg.nd_command' together with other information on the pdsm payload (size-in, size-out). The patch also introduces 'struct pdsm_cmd_desc' instances of which are stored in an array __pdsm_cmd_descriptors[] indexed with PDSM cmd and corresponding access function pdsm_cmd_desc() is introduced. 'struct pdsm_cdm_desc' holds the service function for a given PDSM and corresponding payload in/out sizes. A new function papr_scm_service_pdsm() is introduced and is called from papr_scm_ndctl() in case of a PDSM request is received via ND_CMD_CALL command from libnvdimm. The function performs validation on the PDSM payload based on info present in corresponding PDSM descriptor and if valid calls the 'struct pdcm_cmd_desc.service' function to service the PDSM. Signed-off-by: Vaibhav Jain Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-6-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index de5d90212409..0e09dc5cec19 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -244,6 +244,7 @@ struct nd_cmd_pkg { #define NVDIMM_FAMILY_HPE2 2 #define NVDIMM_FAMILY_MSFT 3 #define NVDIMM_FAMILY_HYPERV 4 +#define NVDIMM_FAMILY_PAPR 5 #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ struct nd_cmd_pkg) -- cgit v1.2.3 From 60997c3d45d9a67daf01c56d805ae4fec37e0bd8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 3 Jun 2020 21:48:55 +0200 Subject: close_range: add CLOSE_RANGE_UNSHARE One of the use-cases of close_range() is to drop file descriptors just before execve(). This would usually be expressed in the sequence: unshare(CLONE_FILES); close_range(3, ~0U); as pointed out by Linus it might be desirable to have this be a part of close_range() itself under a new flag CLOSE_RANGE_UNSHARE. This expands {dup,unshare)_fd() to take a max_fds argument that indicates the maximum number of file descriptors to copy from the old struct files. When the user requests that all file descriptors are supposed to be closed via close_range(min, max) then we can cap via unshare_fd(min) and hence don't need to do any of the heavy fput() work for everything above min. The patch makes it so that if CLOSE_RANGE_UNSHARE is requested and we do in fact currently share our file descriptor table we create a new private copy. We then close all fds in the requested range and finally after we're done we install the new fd table. Suggested-by: Linus Torvalds Signed-off-by: Christian Brauner --- include/uapi/linux/close_range.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 include/uapi/linux/close_range.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/close_range.h b/include/uapi/linux/close_range.h new file mode 100644 index 000000000000..6928a9fdee3c --- /dev/null +++ b/include/uapi/linux/close_range.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_CLOSE_RANGE_H +#define _UAPI_LINUX_CLOSE_RANGE_H + +/* Unshare the file descriptor table before closing file descriptors. */ +#define CLOSE_RANGE_UNSHARE (1U << 1) + +#endif /* _UAPI_LINUX_CLOSE_RANGE_H */ + -- cgit v1.2.3 From 03cc8353c2244c8b790c3c81a0f1532d34a9d738 Mon Sep 17 00:00:00 2001 From: Rob Gill Date: Mon, 1 Jun 2020 21:17:56 +0000 Subject: USB: core: additional Device Classes to debug/usb/devices Several newer USB Device classes are not presently reported individually at /sys/kernel/debug/usb/devices, (reported as "unk."). This patch adds the following classes: 0fh (Personal Healthcare devices), 10h (USB Type-C combined Audio/Video devices) 11h (USB billboard), 12h (USB Type-C Bridge). As defined at [https://www.usb.org/defined-class-codes] Corresponding classes defined in include/linux/usb/ch9.h. Signed-off-by: Rob Gill Link: https://lore.kernel.org/r/20200601211749.6878-1-rrobgill@protonmail.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/ch9.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 2b623f36af6b..456ab0c2b586 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -326,6 +326,10 @@ struct usb_device_descriptor { #define USB_CLASS_CONTENT_SEC 0x0d /* content security */ #define USB_CLASS_VIDEO 0x0e #define USB_CLASS_WIRELESS_CONTROLLER 0xe0 +#define USB_CLASS_PERSONAL_HEALTHCARE 0x0f +#define USB_CLASS_AUDIO_VIDEO 0x10 +#define USB_CLASS_BILLBOARD 0x11 +#define USB_CLASS_USB_TYPE_C_BRIDGE 0x12 #define USB_CLASS_MISC 0xef #define USB_CLASS_APP_SPEC 0xfe #define USB_CLASS_VENDOR_SPEC 0xff -- cgit v1.2.3 From 81c7462883b0cc0a4eeef0687f80ad5b5baee5f6 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Thu, 18 Jun 2020 17:13:38 +0800 Subject: USB: replace hardcode maximum usb string length by definition Replace hardcoded maximum USB string length (126 bytes) by definition "USB_MAX_STRING_LEN". Signed-off-by: Macpaul Lin Acked-by: Alan Stern Link: https://lore.kernel.org/r/1592471618-29428-1-git-send-email-macpaul.lin@mediatek.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/ch9.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 456ab0c2b586..b1ed2ccfe9cf 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -368,6 +368,9 @@ struct usb_config_descriptor { /*-------------------------------------------------------------------------*/ +/* USB String descriptors can contain at most 126 characters. */ +#define USB_MAX_STRING_LEN 126 + /* USB_DT_STRING: String descriptor */ struct usb_string_descriptor { __u8 bLength; -- cgit v1.2.3 From f751820bc319a30d31a80fe511d88642aaefcdee Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 18 Jun 2020 13:07:13 -0600 Subject: vfio/type1: Fix migration info capability ID ID 1 is already used by the IOVA range capability, use ID 2. Reported-by: Liu Yi L Fixes: ad721705d09c ("vfio iommu: Add migration capability to report supported features") Reviewed-by: Kirti Wankhede Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index eca6692667a3..920470502329 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1030,7 +1030,7 @@ struct vfio_iommu_type1_info_cap_iova_range { * size in bytes that can be used by user applications when getting the dirty * bitmap. */ -#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 1 +#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 2 struct vfio_iommu_type1_info_cap_migration { struct vfio_info_cap_header header; -- cgit v1.2.3 From 5769a351b89cd4d82016f18fa5f6c4077403564d Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Wed, 17 Jun 2020 17:53:55 +0800 Subject: io_uring: change the poll type to be 32-bits poll events should be 32-bits to cover EPOLLEXCLUSIVE. Explicit word-swap the poll32_events for big endian to make sure the ABI is not changed. We call this feature IORING_FEAT_POLL_32BITS, applications who want to use EPOLLEXCLUSIVE should check the feature bit first. Signed-off-by: Jiufei Xue Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 92c22699a5a7..8d033961cb78 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -31,7 +31,8 @@ struct io_uring_sqe { union { __kernel_rwf_t rw_flags; __u32 fsync_flags; - __u16 poll_events; + __u16 poll_events; /* compatibility */ + __u32 poll32_events; /* word-reversed for BE */ __u32 sync_range_flags; __u32 msg_flags; __u32 timeout_flags; @@ -248,6 +249,7 @@ struct io_uring_params { #define IORING_FEAT_RW_CUR_POS (1U << 3) #define IORING_FEAT_CUR_PERSONALITY (1U << 4) #define IORING_FEAT_FAST_POLL (1U << 5) +#define IORING_FEAT_POLL_32BITS (1U << 6) /* * io_uring_register(2) opcodes and arguments -- cgit v1.2.3 From 2a916ecc405686c1d86f632281bc06aa75ebae4e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 19 Jun 2020 03:32:48 +0000 Subject: net/devlink: Support querying hardware address of port function PCI PF and VF devlink port can manage the function represented by a devlink port. Enable users to query port function's hardware address. Example of a PCI VF port which supports a port function: $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:11:22:33:44:66 $ devlink port show pci/0000:06:00.0/2 -jp { "port": { "pci/0000:06:00.0/2": { "type": "eth", "netdev": "enp6s0pf0vf1", "flavour": "pcivf", "pfnum": 0, "vfnum": 1, "function": { "hw_addr": "00:11:22:33:44:66" } } } } Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 08563e6a424d..07d0af8f5923 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -451,6 +451,8 @@ enum devlink_attr { DEVLINK_ATTR_TRAP_POLICER_RATE, /* u64 */ DEVLINK_ATTR_TRAP_POLICER_BURST, /* u64 */ + DEVLINK_ATTR_PORT_FUNCTION, /* nested */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, @@ -497,4 +499,12 @@ enum devlink_resource_unit { DEVLINK_RESOURCE_UNIT_ENTRY, }; +enum devlink_port_function_attr { + DEVLINK_PORT_FUNCTION_ATTR_UNSPEC, + DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */ + + __DEVLINK_PORT_FUNCTION_ATTR_MAX, + DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1 +}; + #endif /* _UAPI_LINUX_DEVLINK_H_ */ -- cgit v1.2.3 From b5872cd0e823e4cb50b3a75cd9522167eeb676a2 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 20 Jun 2020 22:01:56 +0530 Subject: devlink: Add support for board.serial_number to info_get cb. Board serial number is a serial number, often available in PCI *Vital Product Data*. Also, update devlink-info.rst documentation file. Cc: Jiri Pirko Cc: Jakub Kicinski Signed-off-by: Vasundhara Volam Reviewed-by: Michael Chan Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 07d0af8f5923..87c83a82991b 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -453,6 +453,8 @@ enum devlink_attr { DEVLINK_ATTR_PORT_FUNCTION, /* nested */ + DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER, /* string */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From 23a60f834406c8e3805328b630d09d5546b460c1 Mon Sep 17 00:00:00 2001 From: Collin Walling Date: Mon, 22 Jun 2020 11:46:36 -0400 Subject: s390/kvm: diagnose 0x318 sync and reset DIAGNOSE 0x318 (diag318) sets information regarding the environment the VM is running in (Linux, z/VM, etc) and is observed via firmware/service events. This is a privileged s390x instruction that must be intercepted by SIE. Userspace handles the instruction as well as migration. Data is communicated via VCPU register synchronization. The Control Program Name Code (CPNC) is stored in the SIE block. The CPNC along with the Control Program Version Code (CPVC) are stored in the kvm_vcpu_arch struct. This data is reset on load normal and clear resets. Signed-off-by: Collin Walling Reviewed-by: Janosch Frank Acked-by: Cornelia Huck Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20200622154636.5499-3-walling@linux.ibm.com [borntraeger@de.ibm.com: fix sync_reg position] Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4fdf30316582..35cdb4307904 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1031,6 +1031,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_SECURE_GUEST 181 #define KVM_CAP_HALT_POLL 182 #define KVM_CAP_ASYNC_PF_INT 183 +#define KVM_CAP_S390_DIAG318 184 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From ac53503ee38a1ffbc47c7cca6cbfc48ba9c65c5e Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 14 May 2020 18:01:43 +0200 Subject: media: videobuf2: add V4L2_FLAG_MEMORY_NON_CONSISTENT flag By setting or clearing V4L2_FLAG_MEMORY_NON_CONSISTENT flag user-space should be able to set or clear queue's NON_CONSISTENT ->dma_attrs. Queue's ->dma_attrs are passed to the underlying allocator in __vb2_buf_mem_alloc(), so thus user-space is able to request vb2 buffer's memory to be either consistent (coherent) or non-consistent. The patch set also adds a corresponding capability flag: fill_buf_caps() reports V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS when queue supports user-space cache management hints. Note, however, that MMAP_CACHE_HINTS capability only valid when the queue is used for memory MMAP-ed streaming I/O. Signed-off-by: Sergey Senozhatsky Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index c3a1cf1c507f..34ba1017b89b 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -189,6 +189,8 @@ enum v4l2_memory { V4L2_MEMORY_DMABUF = 4, }; +#define V4L2_FLAG_MEMORY_NON_CONSISTENT (1 << 0) + /* see also http://vektor.theorem.ca/graphics/ycbcr/ */ enum v4l2_colorspace { /* @@ -954,6 +956,7 @@ struct v4l2_requestbuffers { #define V4L2_BUF_CAP_SUPPORTS_REQUESTS (1 << 3) #define V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS (1 << 4) #define V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF (1 << 5) +#define V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS (1 << 6) /** * struct v4l2_plane - plane info for multi-planar buffers -- cgit v1.2.3 From 1e0b2318fa75d186ee0d2be31843ce867385fcc4 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 14 May 2020 18:01:45 +0200 Subject: media: videobuf2: handle V4L2_FLAG_MEMORY_NON_CONSISTENT flag This patch lets user-space to request a non-consistent memory allocation during CREATE_BUFS and REQBUFS ioctl calls. = CREATE_BUFS struct v4l2_create_buffers has seven 4-byte reserved areas, so reserved[0] is renamed to ->flags. The struct, thus, now has six reserved 4-byte regions. = CREATE_BUFS32 struct v4l2_create_buffers32 has seven 4-byte reserved areas, so reserved[0] is renamed to ->flags. The struct, thus, now has six reserved 4-byte regions. = REQBUFS We use one bit of a ->reserved[1] member of struct v4l2_requestbuffers, which is now renamed to ->flags. Unlike v4l2_create_buffers, struct v4l2_requestbuffers does not have enough reserved room. Therefore for backward compatibility ->reserved and ->flags were put into anonymous union. Signed-off-by: Sergey Senozhatsky Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 34ba1017b89b..fec2607a07e3 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -946,7 +946,10 @@ struct v4l2_requestbuffers { __u32 type; /* enum v4l2_buf_type */ __u32 memory; /* enum v4l2_memory */ __u32 capabilities; - __u32 reserved[1]; + union { + __u32 flags; + __u32 reserved[1]; + }; }; /* capabilities for struct v4l2_requestbuffers and v4l2_create_buffers */ @@ -2450,6 +2453,9 @@ struct v4l2_dbg_chip_info { * @memory: enum v4l2_memory; buffer memory type * @format: frame format, for which buffers are requested * @capabilities: capabilities of this buffer type. + * @flags: additional buffer management attributes (ignored unless the + * queue has V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS capability + * and configured for MMAP streaming I/O). * @reserved: future extensions */ struct v4l2_create_buffers { @@ -2458,7 +2464,8 @@ struct v4l2_create_buffers { __u32 memory; struct v4l2_format format; __u32 capabilities; - __u32 reserved[7]; + __u32 flags; + __u32 reserved[6]; }; /* -- cgit v1.2.3 From 286cf7d3a99e1ca8c1d8e674b9a98f2dbe8520dc Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 26 May 2020 10:59:53 +0200 Subject: media: videodev2.h: add V4L2_FMT_FLAG_ENC_CAP_FRAME_INTERVAL flag Add the V4L2_FMT_FLAG_ENC_CAP_FRAME_INTERVAL flag to signal that the coded frame interval can be set separately from the raw frame interval for stateful encoders. Signed-off-by: Hans Verkuil Reviewed-by: Michael Tretter Acked-by: Tomasz Figa Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index fec2607a07e3..303805438814 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -794,6 +794,7 @@ struct v4l2_fmtdesc { #define V4L2_FMT_FLAG_EMULATED 0x0002 #define V4L2_FMT_FLAG_CONTINUOUS_BYTESTREAM 0x0004 #define V4L2_FMT_FLAG_DYN_RESOLUTION 0x0008 +#define V4L2_FMT_FLAG_ENC_CAP_FRAME_INTERVAL 0x0010 /* Frame Size and frame rate enumeration */ /* -- cgit v1.2.3 From 79a28ddd18e9c653f13f60dfabee15c024e64b9b Mon Sep 17 00:00:00 2001 From: Alexandre Cassen Date: Tue, 23 Jun 2020 10:33:45 +0200 Subject: rtnetlink: add keepalived rtm_protocol Keepalived can set global static ip routes or virtual ip routes dynamically following VRRP protocol states. Using a dedicated rtm_protocol will help keeping track of it. Changes in v2: - fix tab/space indenting Signed-off-by: Alexandre Cassen Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 45 +++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 22 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 073e71ef6bdd..879e64950a0a 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -257,12 +257,12 @@ enum { /* rtm_protocol */ -#define RTPROT_UNSPEC 0 -#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects; - not used by current IPv4 */ -#define RTPROT_KERNEL 2 /* Route installed by kernel */ -#define RTPROT_BOOT 3 /* Route installed during boot */ -#define RTPROT_STATIC 4 /* Route installed by administrator */ +#define RTPROT_UNSPEC 0 +#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects; + not used by current IPv4 */ +#define RTPROT_KERNEL 2 /* Route installed by kernel */ +#define RTPROT_BOOT 3 /* Route installed during boot */ +#define RTPROT_STATIC 4 /* Route installed by administrator */ /* Values of protocol >= RTPROT_STATIC are not interpreted by kernel; they are just passed from user and back as is. @@ -271,22 +271,23 @@ enum { avoid conflicts. */ -#define RTPROT_GATED 8 /* Apparently, GateD */ -#define RTPROT_RA 9 /* RDISC/ND router advertisements */ -#define RTPROT_MRT 10 /* Merit MRT */ -#define RTPROT_ZEBRA 11 /* Zebra */ -#define RTPROT_BIRD 12 /* BIRD */ -#define RTPROT_DNROUTED 13 /* DECnet routing daemon */ -#define RTPROT_XORP 14 /* XORP */ -#define RTPROT_NTK 15 /* Netsukuku */ -#define RTPROT_DHCP 16 /* DHCP client */ -#define RTPROT_MROUTED 17 /* Multicast daemon */ -#define RTPROT_BABEL 42 /* Babel daemon */ -#define RTPROT_BGP 186 /* BGP Routes */ -#define RTPROT_ISIS 187 /* ISIS Routes */ -#define RTPROT_OSPF 188 /* OSPF Routes */ -#define RTPROT_RIP 189 /* RIP Routes */ -#define RTPROT_EIGRP 192 /* EIGRP Routes */ +#define RTPROT_GATED 8 /* Apparently, GateD */ +#define RTPROT_RA 9 /* RDISC/ND router advertisements */ +#define RTPROT_MRT 10 /* Merit MRT */ +#define RTPROT_ZEBRA 11 /* Zebra */ +#define RTPROT_BIRD 12 /* BIRD */ +#define RTPROT_DNROUTED 13 /* DECnet routing daemon */ +#define RTPROT_XORP 14 /* XORP */ +#define RTPROT_NTK 15 /* Netsukuku */ +#define RTPROT_DHCP 16 /* DHCP client */ +#define RTPROT_MROUTED 17 /* Multicast daemon */ +#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */ +#define RTPROT_BABEL 42 /* Babel daemon */ +#define RTPROT_BGP 186 /* BGP Routes */ +#define RTPROT_ISIS 187 /* ISIS Routes */ +#define RTPROT_OSPF 188 /* OSPF Routes */ +#define RTPROT_RIP 189 /* RIP Routes */ +#define RTPROT_EIGRP 192 /* EIGRP Routes */ /* rtm_scope -- cgit v1.2.3 From 2464bc7c2897b7549146a743045089d3aea7b85b Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 23 Jun 2020 11:05:40 +0200 Subject: bridge: uapi: mrp: Fix MRP_PORT_ROLE Currently the MRP_PORT_ROLE_NONE has the value 0x2 but this is in conflict with the IEC 62439-2 standard. The standard defines the following port roles: primary (0x0), secondary(0x1), interconnect(0x2). Therefore remove the port role none. Fixes: 4714d13791f831 ("bridge: uapi: mrp: Add mrp attributes.") Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- include/uapi/linux/mrp_bridge.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h index 84f15f48a7cb..bee366540212 100644 --- a/include/uapi/linux/mrp_bridge.h +++ b/include/uapi/linux/mrp_bridge.h @@ -36,7 +36,6 @@ enum br_mrp_port_state_type { enum br_mrp_port_role_type { BR_MRP_PORT_ROLE_PRIMARY, BR_MRP_PORT_ROLE_SECONDARY, - BR_MRP_PORT_ROLE_NONE, }; enum br_mrp_tlv_header_type { -- cgit v1.2.3 From bdb7b79b4ce864a724250e1d35948c46f135de36 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 22 Jun 2020 20:22:21 -0700 Subject: bpf: Switch most helper return values from 32-bit int to 64-bit long Switch most of BPF helper definitions from returning int to long. These definitions are coming from comments in BPF UAPI header and are used to generate bpf_helper_defs.h (under libbpf) to be later included and used from BPF programs. In actual in-kernel implementation, all the helpers are defined as returning u64, but due to some historical reasons, most of them are actually defined as returning int in UAPI (usually, to return 0 on success, and negative value on error). This actually causes Clang to quite often generate sub-optimal code, because compiler believes that return value is 32-bit, and in a lot of cases has to be up-converted (usually with a pair of 32-bit bit shifts) to 64-bit values, before they can be used further in BPF code. Besides just "polluting" the code, these 32-bit shifts quite often cause problems for cases in which return value matters. This is especially the case for the family of bpf_probe_read_str() functions. There are few other similar helpers (e.g., bpf_read_branch_records()), in which return value is used by BPF program logic to record variable-length data and process it. For such cases, BPF program logic carefully manages offsets within some array or map to read variable-length data. For such uses, it's crucial for BPF verifier to track possible range of register values to prove that all the accesses happen within given memory bounds. Those extraneous zero-extending bit shifts, inserted by Clang (and quite often interleaved with other code, which makes the issues even more challenging and sometimes requires employing extra per-variable compiler barriers), throws off verifier logic and makes it mark registers as having unknown variable offset. We'll study this pattern a bit later below. Another common pattern is to check return of BPF helper for non-zero state to detect error conditions and attempt alternative actions in such case. Even in this simple and straightforward case, this 32-bit vs BPF's native 64-bit mode quite often leads to sub-optimal and unnecessary extra code. We'll look at this pattern as well. Clang's BPF target supports two modes of code generation: ALU32, in which it is capable of using lower 32-bit parts of registers, and no-ALU32, in which only full 64-bit registers are being used. ALU32 mode somewhat mitigates the above described problems, but not in all cases. This patch switches all the cases in which BPF helpers return 0 or negative error from returning int to returning long. It is shown below that such change in definition leads to equivalent or better code. No-ALU32 mode benefits more, but ALU32 mode doesn't degrade or still gets improved code generation. Another class of cases switched from int to long are bpf_probe_read_str()-like helpers, which encode successful case as non-negative values, while still returning negative value for errors. In all of such cases, correctness is preserved due to two's complement encoding of negative values and the fact that all helpers return values with 32-bit absolute value. Two's complement ensures that for negative values higher 32 bits are all ones and when truncated, leave valid negative 32-bit value with the same value. Non-negative values have upper 32 bits set to zero and similarly preserve value when high 32 bits are truncated. This means that just casting to int/u32 is correct and efficient (and in ALU32 mode doesn't require any extra shifts). To minimize the chances of regressions, two code patterns were investigated, as mentioned above. For both patterns, BPF assembly was analyzed in ALU32/NO-ALU32 compiler modes, both with current 32-bit int return type and new 64-bit long return type. Case 1. Variable-length data reading and concatenation. This is quite ubiquitous pattern in tracing/monitoring applications, reading data like process's environment variables, file path, etc. In such case, many pieces of string-like variable-length data are read into a single big buffer, and at the end of the process, only a part of array containing actual data is sent to user-space for further processing. This case is tested in test_varlen.c selftest (in the next patch). Code flow is roughly as follows: void *payload = &sample->payload; u64 len; len = bpf_probe_read_kernel_str(payload, MAX_SZ1, &source_data1); if (len <= MAX_SZ1) { payload += len; sample->len1 = len; } len = bpf_probe_read_kernel_str(payload, MAX_SZ2, &source_data2); if (len <= MAX_SZ2) { payload += len; sample->len2 = len; } /* and so on */ sample->total_len = payload - &sample->payload; /* send over, e.g., perf buffer */ There could be two variations with slightly different code generated: when len is 64-bit integer and when it is 32-bit integer. Both variations were analysed. BPF assembly instructions between two successive invocations of bpf_probe_read_kernel_str() were used to check code regressions. Results are below, followed by short analysis. Left side is using helpers with int return type, the right one is after the switch to long. ALU32 + INT ALU32 + LONG =========== ============ 64-BIT (13 insns): 64-BIT (10 insns): ------------------------------------ ------------------------------------ 17: call 115 17: call 115 18: if w0 > 256 goto +9 18: if r0 > 256 goto +6 19: w1 = w0 19: r1 = 0 ll 20: r1 <<= 32 21: *(u64 *)(r1 + 0) = r0 21: r1 s>>= 32 22: r6 = 0 ll 22: r2 = 0 ll 24: r6 += r0 24: *(u64 *)(r2 + 0) = r1 00000000000000c8 : 25: r6 = 0 ll 25: r1 = r6 27: r6 += r1 26: w2 = 256 00000000000000e0 : 27: r3 = 0 ll 28: r1 = r6 29: call 115 29: w2 = 256 30: r3 = 0 ll 32: call 115 32-BIT (11 insns): 32-BIT (12 insns): ------------------------------------ ------------------------------------ 17: call 115 17: call 115 18: if w0 > 256 goto +7 18: if w0 > 256 goto +8 19: r1 = 0 ll 19: r1 = 0 ll 21: *(u32 *)(r1 + 0) = r0 21: *(u32 *)(r1 + 0) = r0 22: w1 = w0 22: r0 <<= 32 23: r6 = 0 ll 23: r0 >>= 32 25: r6 += r1 24: r6 = 0 ll 00000000000000d0 : 26: r6 += r0 26: r1 = r6 00000000000000d8 : 27: w2 = 256 27: r1 = r6 28: r3 = 0 ll 28: w2 = 256 30: call 115 29: r3 = 0 ll 31: call 115 In ALU32 mode, the variant using 64-bit length variable clearly wins and avoids unnecessary zero-extension bit shifts. In practice, this is even more important and good, because BPF code won't need to do extra checks to "prove" that payload/len are within good bounds. 32-bit len is one instruction longer. Clang decided to do 64-to-32 casting with two bit shifts, instead of equivalent `w1 = w0` assignment. The former uses extra register. The latter might potentially lose some range information, but not for 32-bit value. So in this case, verifier infers that r0 is [0, 256] after check at 18:, and shifting 32 bits left/right keeps that range intact. We should probably look into Clang's logic and see why it chooses bitshifts over sub-register assignments for this. NO-ALU32 + INT NO-ALU32 + LONG ============== =============== 64-BIT (14 insns): 64-BIT (10 insns): ------------------------------------ ------------------------------------ 17: call 115 17: call 115 18: r0 <<= 32 18: if r0 > 256 goto +6 19: r1 = r0 19: r1 = 0 ll 20: r1 >>= 32 21: *(u64 *)(r1 + 0) = r0 21: if r1 > 256 goto +7 22: r6 = 0 ll 22: r0 s>>= 32 24: r6 += r0 23: r1 = 0 ll 00000000000000c8 : 25: *(u64 *)(r1 + 0) = r0 25: r1 = r6 26: r6 = 0 ll 26: r2 = 256 28: r6 += r0 27: r3 = 0 ll 00000000000000e8 : 29: call 115 29: r1 = r6 30: r2 = 256 31: r3 = 0 ll 33: call 115 32-BIT (13 insns): 32-BIT (13 insns): ------------------------------------ ------------------------------------ 17: call 115 17: call 115 18: r1 = r0 18: r1 = r0 19: r1 <<= 32 19: r1 <<= 32 20: r1 >>= 32 20: r1 >>= 32 21: if r1 > 256 goto +6 21: if r1 > 256 goto +6 22: r2 = 0 ll 22: r2 = 0 ll 24: *(u32 *)(r2 + 0) = r0 24: *(u32 *)(r2 + 0) = r0 25: r6 = 0 ll 25: r6 = 0 ll 27: r6 += r1 27: r6 += r1 00000000000000e0 : 00000000000000e0 : 28: r1 = r6 28: r1 = r6 29: r2 = 256 29: r2 = 256 30: r3 = 0 ll 30: r3 = 0 ll 32: call 115 32: call 115 In NO-ALU32 mode, for the case of 64-bit len variable, Clang generates much superior code, as expected, eliminating unnecessary bit shifts. For 32-bit len, code is identical. So overall, only ALU-32 32-bit len case is more-or-less equivalent and the difference stems from internal Clang decision, rather than compiler lacking enough information about types. Case 2. Let's look at the simpler case of checking return result of BPF helper for errors. The code is very simple: long bla; if (bpf_probe_read_kenerl(&bla, sizeof(bla), 0)) return 1; else return 0; ALU32 + CHECK (9 insns) ALU32 + CHECK (9 insns) ==================================== ==================================== 0: r1 = r10 0: r1 = r10 1: r1 += -8 1: r1 += -8 2: w2 = 8 2: w2 = 8 3: r3 = 0 3: r3 = 0 4: call 113 4: call 113 5: w1 = w0 5: r1 = r0 6: w0 = 1 6: w0 = 1 7: if w1 != 0 goto +1 7: if r1 != 0 goto +1 8: w0 = 0 8: w0 = 0 0000000000000048 : 0000000000000048 : 9: exit 9: exit Almost identical code, the only difference is the use of full register assignment (r1 = r0) vs half-registers (w1 = w0) in instruction #5. On 32-bit architectures, new BPF assembly might be slightly less optimal, in theory. But one can argue that's not a big issue, given that use of full registers is still prevalent (e.g., for parameter passing). NO-ALU32 + CHECK (11 insns) NO-ALU32 + CHECK (9 insns) ==================================== ==================================== 0: r1 = r10 0: r1 = r10 1: r1 += -8 1: r1 += -8 2: r2 = 8 2: r2 = 8 3: r3 = 0 3: r3 = 0 4: call 113 4: call 113 5: r1 = r0 5: r1 = r0 6: r1 <<= 32 6: r0 = 1 7: r1 >>= 32 7: if r1 != 0 goto +1 8: r0 = 1 8: r0 = 0 9: if r1 != 0 goto +1 0000000000000048 : 10: r0 = 0 9: exit 0000000000000058 : 11: exit NO-ALU32 is a clear improvement, getting rid of unnecessary zero-extension bit shifts. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200623032224.4020118-1-andriin@fb.com --- include/uapi/linux/bpf.h | 192 +++++++++++++++++++++++------------------------ 1 file changed, 96 insertions(+), 96 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 19684813faae..be0efee49093 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -653,7 +653,7 @@ union bpf_attr { * Map value associated to *key*, or **NULL** if no entry was * found. * - * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + * long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) * Description * Add or update the value of the entry associated to *key* in * *map* with *value*. *flags* is one of: @@ -671,13 +671,13 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_delete_elem(struct bpf_map *map, const void *key) + * long bpf_map_delete_elem(struct bpf_map *map, const void *key) * Description * Delete entry with *key* from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) * Description * For tracing programs, safely attempt to read *size* bytes from * kernel space address *unsafe_ptr* and store the data in *dst*. @@ -695,7 +695,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) + * long bpf_trace_printk(const char *fmt, u32 fmt_size, ...) * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) @@ -775,7 +775,7 @@ union bpf_attr { * Return * The SMP id of the processor running the program. * - * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) + * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. *flags* are a combination of @@ -792,7 +792,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) + * long bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) * Description * Recompute the layer 3 (e.g. IP) checksum for the packet * associated to *skb*. Computation is incremental, so the helper @@ -817,7 +817,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) + * long bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) * Description * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the * packet associated to *skb*. Computation is incremental, so the @@ -849,7 +849,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) + * long bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) * Description * This special helper is used to trigger a "tail call", or in * other words, to jump into another eBPF program. The same stack @@ -880,7 +880,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) + * long bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) * Description * Clone and redirect the packet associated to *skb* to another * net device of index *ifindex*. Both ingress and egress @@ -916,7 +916,7 @@ union bpf_attr { * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. * - * int bpf_get_current_comm(void *buf, u32 size_of_buf) + * long bpf_get_current_comm(void *buf, u32 size_of_buf) * Description * Copy the **comm** attribute of the current task into *buf* of * *size_of_buf*. The **comm** attribute contains the name of @@ -953,7 +953,7 @@ union bpf_attr { * Return * The classid, or 0 for the default unconfigured classid. * - * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) + * long bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) * Description * Push a *vlan_tci* (VLAN tag control information) of protocol * *vlan_proto* to the packet associated to *skb*, then update @@ -969,7 +969,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_vlan_pop(struct sk_buff *skb) + * long bpf_skb_vlan_pop(struct sk_buff *skb) * Description * Pop a VLAN header from the packet associated to *skb*. * @@ -981,7 +981,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Get tunnel metadata. This helper takes a pointer *key* to an * empty **struct bpf_tunnel_key** of **size**, that will be @@ -1032,7 +1032,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Populate tunnel metadata for packet associated to *skb.* The * tunnel metadata is set to the contents of *key*, of *size*. The @@ -1098,7 +1098,7 @@ union bpf_attr { * The value of the perf event counter read from the map, or a * negative error code in case of failure. * - * int bpf_redirect(u32 ifindex, u64 flags) + * long bpf_redirect(u32 ifindex, u64 flags) * Description * Redirect the packet to another net device of index *ifindex*. * This helper is somewhat similar to **bpf_clone_redirect**\ @@ -1145,7 +1145,7 @@ union bpf_attr { * The realm of the route for the packet associated to *skb*, or 0 * if none was found. * - * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -1190,7 +1190,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) + * long bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) * Description * This helper was provided as an easy way to load data from a * packet. It can be used to load *len* bytes from *offset* from @@ -1207,7 +1207,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) + * long bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) * Description * Walk a user or a kernel stack and return its id. To achieve * this, the helper needs *ctx*, which is a pointer to the context @@ -1276,7 +1276,7 @@ union bpf_attr { * The checksum result, or a negative error code in case of * failure. * - * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Retrieve tunnel options metadata for the packet associated to * *skb*, and store the raw tunnel option data to the buffer *opt* @@ -1294,7 +1294,7 @@ union bpf_attr { * Return * The size of the option data retrieved. * - * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Set tunnel options metadata for the packet associated to *skb* * to the option data contained in the raw buffer *opt* of *size*. @@ -1304,7 +1304,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) + * long bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) * Description * Change the protocol of the *skb* to *proto*. Currently * supported are transition from IPv4 to IPv6, and from IPv6 to @@ -1331,7 +1331,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_type(struct sk_buff *skb, u32 type) + * long bpf_skb_change_type(struct sk_buff *skb, u32 type) * Description * Change the packet type for the packet associated to *skb*. This * comes down to setting *skb*\ **->pkt_type** to *type*, except @@ -1358,7 +1358,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) + * long bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) * Description * Check whether *skb* is a descendant of the cgroup2 held by * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. @@ -1389,7 +1389,7 @@ union bpf_attr { * Return * A pointer to the current task struct. * - * int bpf_probe_write_user(void *dst, const void *src, u32 len) + * long bpf_probe_write_user(void *dst, const void *src, u32 len) * Description * Attempt in a safe way to write *len* bytes from the buffer * *src* to *dst* in memory. It only works for threads that are in @@ -1408,7 +1408,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) + * long bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) * Description * Check whether the probe is being run is the context of a given * subset of the cgroup2 hierarchy. The cgroup2 to test is held by @@ -1420,7 +1420,7 @@ union bpf_attr { * * 1, if the *skb* task does not belong to the cgroup2. * * A negative error code, if an error occurred. * - * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) * Description * Resize (trim or grow) the packet associated to *skb* to the * new *len*. The *flags* are reserved for future usage, and must @@ -1444,7 +1444,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) + * long bpf_skb_pull_data(struct sk_buff *skb, u32 len) * Description * Pull in non-linear data in case the *skb* is non-linear and not * all of *len* are part of the linear section. Make *len* bytes @@ -1500,7 +1500,7 @@ union bpf_attr { * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. * - * int bpf_get_numa_node_id(void) + * long bpf_get_numa_node_id(void) * Description * Return the id of the current NUMA node. The primary use case * for this helper is the selection of sockets for the local NUMA @@ -1511,7 +1511,7 @@ union bpf_attr { * Return * The id of current NUMA node. * - * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) * Description * Grows headroom of packet associated to *skb* and adjusts the * offset of the MAC header accordingly, adding *len* bytes of @@ -1532,7 +1532,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that * it is possible to use a negative value for *delta*. This helper @@ -1547,7 +1547,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for @@ -1595,14 +1595,14 @@ union bpf_attr { * is returned (note that **overflowuid** might also be the actual * UID value for the socket). * - * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) + * long bpf_set_hash(struct sk_buff *skb, u32 hash) * Description * Set the full hash for *skb* (set the field *skb*\ **->hash**) * to value *hash*. * Return * 0 * - * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **setsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1630,7 +1630,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) + * long bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) * Description * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. @@ -1676,7 +1676,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain @@ -1697,7 +1697,7 @@ union bpf_attr { * **XDP_REDIRECT** on success, or the value of the two lower bits * of the *flags* argument on error. * - * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) + * long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and @@ -1708,7 +1708,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a *map* referencing sockets. The * *skops* is used as a new value for the entry associated to @@ -1727,7 +1727,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) * Description * Adjust the address pointed by *xdp_md*\ **->data_meta** by * *delta* (which can be positive or negative). Note that this @@ -1756,7 +1756,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) * Description * Read the value of a perf event counter, and store it into *buf* * of size *buf_size*. This helper relies on a *map* of type @@ -1806,7 +1806,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) * Description * For en eBPF program attached to a perf event, retrieve the * value of the event counter associated to *ctx* and store it in @@ -1817,7 +1817,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **getsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1842,7 +1842,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_override_return(struct pt_regs *regs, u64 rc) + * long bpf_override_return(struct pt_regs *regs, u64 rc) * Description * Used for error injection, this helper uses kprobes to override * the return value of the probed function, and to set it to *rc*. @@ -1867,7 +1867,7 @@ union bpf_attr { * Return * 0 * - * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) + * long bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) * Description * Attempt to set the value of the **bpf_sock_ops_cb_flags** field * for the full TCP socket associated to *bpf_sock_ops* to @@ -1911,7 +1911,7 @@ union bpf_attr { * be set is returned (which comes down to 0 if all bits were set * as required). * - * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) + * long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -1925,7 +1925,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, apply the verdict of the eBPF program to * the next *bytes* (number of bytes) of message *msg*. @@ -1959,7 +1959,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, prevent the execution of the verdict eBPF * program for message *msg* until *bytes* (byte number) have been @@ -1977,7 +1977,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) + * long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) * Description * For socket policies, pull in non-linear data from user space * for *msg* and set pointers *msg*\ **->data** and *msg*\ @@ -2008,7 +2008,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) + * long bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) * Description * Bind the socket associated to *ctx* to the address pointed by * *addr*, of length *addr_len*. This allows for making outgoing @@ -2026,7 +2026,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is * possible to both shrink and grow the packet tail. @@ -2040,7 +2040,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) + * long bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) * Description * Retrieve the XFRM state (IP transform framework, see also * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. @@ -2056,7 +2056,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) + * long bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *ctx*, which is a pointer @@ -2089,7 +2089,7 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * - * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) + * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description * This helper is similar to **bpf_skb_load_bytes**\ () in that * it provides an easy way to load *len* bytes from *offset* @@ -2111,7 +2111,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) + * long bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) * Description * Do FIB lookup in kernel tables using parameters in *params*. * If lookup is successful and result shows packet is to be @@ -2142,7 +2142,7 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * - * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. * The *skops* is used as a new value for the entry associated to @@ -2161,7 +2161,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) + * long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -2175,7 +2175,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. @@ -2189,7 +2189,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) + * long bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) * Description * Encapsulate the packet associated to *skb* within a Layer 3 * protocol header. This header is provided in the buffer at @@ -2226,7 +2226,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) + * long bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. Only the flags, tag and TLVs @@ -2241,7 +2241,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) + * long bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) * Description * Adjust the size allocated to TLVs in the outermost IPv6 * Segment Routing Header contained in the packet associated to @@ -2257,7 +2257,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) + * long bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) * Description * Apply an IPv6 Segment Routing action of type *action* to the * packet associated to *skb*. Each action takes a parameter @@ -2286,7 +2286,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_repeat(void *ctx) + * long bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded repeat key message. This delays @@ -2305,7 +2305,7 @@ union bpf_attr { * Return * 0 * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * long bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded key press with *scancode*, @@ -2370,7 +2370,7 @@ union bpf_attr { * Return * A pointer to the local storage area. * - * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. @@ -2471,7 +2471,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_sk_release(struct bpf_sock *sock) + * long bpf_sk_release(struct bpf_sock *sock) * Description * Release the reference held by *sock*. *sock* must be a * non-**NULL** pointer that was returned from @@ -2479,7 +2479,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) * Description * Push an element *value* in *map*. *flags* is one of: * @@ -2489,19 +2489,19 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * long bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * long bpf_map_peek_elem(struct bpf_map *map, void *value) * Description * Get an element from *map* without removing it. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * For socket policies, insert *len* bytes into *msg* at offset * *start*. @@ -2517,7 +2517,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * Will remove *len* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if @@ -2529,7 +2529,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * long bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded pointer movement. @@ -2543,7 +2543,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_lock(struct bpf_spin_lock *lock) + * long bpf_spin_lock(struct bpf_spin_lock *lock) * Description * Acquire a spinlock represented by the pointer *lock*, which is * stored as part of a value of a map. Taking the lock allows to @@ -2591,7 +2591,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * long bpf_spin_unlock(struct bpf_spin_lock *lock) * Description * Release the *lock* previously locked by a call to * **bpf_spin_lock**\ (\ *lock*\ ). @@ -2614,7 +2614,7 @@ union bpf_attr { * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * - * int bpf_skb_ecn_set_ce(struct sk_buff *skb) + * long bpf_skb_ecn_set_ce(struct sk_buff *skb) * Description * Set ECN (Explicit Congestion Notification) field of IP header * to **CE** (Congestion Encountered) if current value is **ECT** @@ -2651,7 +2651,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Check whether *iph* and *th* contain a valid SYN cookie ACK for * the listening socket in *sk*. @@ -2666,7 +2666,7 @@ union bpf_attr { * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. * - * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) + * long bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) * Description * Get name of sysctl in /proc/sys/ and copy it into provided by * program buffer *buf* of size *buf_len*. @@ -2682,7 +2682,7 @@ union bpf_attr { * **-E2BIG** if the buffer wasn't big enough (*buf* will contain * truncated name in this case). * - * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get current value of sysctl as it is presented in /proc/sys * (incl. newline, etc), and copy it as a string into provided @@ -2701,7 +2701,7 @@ union bpf_attr { * **-EINVAL** if current value was unavailable, e.g. because * sysctl is uninitialized and read returns -EIO for it. * - * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get new value being written by user space to sysctl (before * the actual write happens) and copy it as a string into @@ -2718,7 +2718,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) + * long bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) * Description * Override new value being written by user space to sysctl with * value provided by program in buffer *buf* of size *buf_len*. @@ -2735,7 +2735,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) + * long bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to a long integer according to the given base @@ -2759,7 +2759,7 @@ union bpf_attr { * * **-ERANGE** if resulting value was out of range. * - * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) + * long bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to an unsigned long integer according to the @@ -2810,7 +2810,7 @@ union bpf_attr { * **NULL** if not found or there was an error in adding * a new bpf-local-storage. * - * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) * Description * Delete a bpf-local-storage from a *sk*. * Return @@ -2818,7 +2818,7 @@ union bpf_attr { * * **-ENOENT** if the bpf-local-storage cannot be found. * - * int bpf_send_signal(u32 sig) + * long bpf_send_signal(u32 sig) * Description * Send signal *sig* to the process of the current task. * The signal may be delivered to any of this process's threads. @@ -2859,7 +2859,7 @@ union bpf_attr { * * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 * - * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -2883,21 +2883,21 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from user space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from kernel space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe user address * *unsafe_ptr* to *dst*. The *size* should include the @@ -2941,7 +2941,7 @@ union bpf_attr { * including the trailing NUL character. On error, a negative * value. * - * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply. @@ -2949,14 +2949,14 @@ union bpf_attr { * On success, the strictly positive length of the string, including * the trailing NUL character. On error, a negative value. * - * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt) + * long bpf_tcp_send_ack(void *tp, u32 rcv_nxt) * Description * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**. * *rcv_nxt* is the ack_seq to be sent out. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_send_signal_thread(u32 sig) + * long bpf_send_signal_thread(u32 sig) * Description * Send signal *sig* to the thread corresponding to the current task. * Return @@ -2976,7 +2976,7 @@ union bpf_attr { * Return * The 64 bit jiffies * - * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) + * long bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) * Description * For an eBPF program attached to a perf event, retrieve the * branch records (**struct perf_branch_entry**) associated to *ctx* @@ -2995,7 +2995,7 @@ union bpf_attr { * * **-ENOENT** if architecture does not support branch records. * - * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * long bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) * Description * Returns 0 on success, values for *pid* and *tgid* as seen from the current * *namespace* will be returned in *nsdata*. @@ -3007,7 +3007,7 @@ union bpf_attr { * * **-ENOENT** if pidns does not exists for the current task. * - * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -3062,7 +3062,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) + * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) * Description * Assign the *sk* to the *skb*. When combined with appropriate * routing configuration to receive the packet towards the socket, @@ -3097,7 +3097,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) * Description * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print * out the format string. @@ -3126,7 +3126,7 @@ union bpf_attr { * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. * - * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) + * long bpf_seq_write(struct seq_file *m, const void *data, u32 len) * Description * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data. * The *m* represents the seq_file. The *data* and *len* represent the @@ -3221,7 +3221,7 @@ union bpf_attr { * Return * Requested value, or 0, if flags are not recognized. * - * int bpf_csum_level(struct sk_buff *skb, u64 level) + * long bpf_csum_level(struct sk_buff *skb, u64 level) * Description * Change the skbs checksum level by one layer up or down, or * reset it entirely to none in order to have the stack perform -- cgit v1.2.3 From bcc7f554cfa7e0ac77c7adc4027c16f4a2f99c6f Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 23 Jun 2020 16:39:35 +0100 Subject: bpf: Fix formatting in documentation for BPF helpers When producing the bpf-helpers.7 man page from the documentation from the BPF user space header file, rst2man complains: :2636: (ERROR/3) Unexpected indentation. :2640: (WARNING/2) Block quote ends without a blank line; unexpected unindent. Let's fix formatting for the relevant chunk (item list in bpf_ringbuf_query()'s description), and for a couple other functions. Signed-off-by: Quentin Monnet Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200623153935.6215-1-quentin@isovalent.com --- include/uapi/linux/bpf.h | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 974a71342aea..8bd33050b7bb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3171,13 +3171,12 @@ union bpf_attr { * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return - * 0, on success; - * < 0, on error. + * 0 on success, or a negative error in case of failure. * * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) * Description @@ -3189,20 +3188,20 @@ union bpf_attr { * void bpf_ringbuf_submit(void *data, u64 flags) * Description * Submit reserved ring buffer sample, pointed to by *data*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return * Nothing. Always succeeds. * * void bpf_ringbuf_discard(void *data, u64 flags) * Description * Discard reserved ring buffer sample, pointed to by *data*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return * Nothing. Always succeeds. * @@ -3210,16 +3209,18 @@ union bpf_attr { * Description * Query various characteristics of provided ring buffer. What * exactly is queries is determined by *flags*: - * - BPF_RB_AVAIL_DATA - amount of data not yet consumed; - * - BPF_RB_RING_SIZE - the size of ring buffer; - * - BPF_RB_CONS_POS - consumer position (can wrap around); - * - BPF_RB_PROD_POS - producer(s) position (can wrap around); - * Data returned is just a momentary snapshots of actual values + * + * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. + * * **BPF_RB_RING_SIZE**: The size of ring buffer. + * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). + * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). + * + * Data returned is just a momentary snapshot of actual values * and could be inaccurate, so this facility should be used to * power heuristics and for reporting, not to make 100% correct * calculation. * Return - * Requested value, or 0, if flags are not recognized. + * Requested value, or 0, if *flags* are not recognized. * * int bpf_csum_level(struct sk_buff *skb, u64 level) * Description -- cgit v1.2.3 From 428d2459cceb77357b81c242ca22462a6a904817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Sat, 30 May 2020 14:39:12 +0200 Subject: xfrm: introduce oseq-may-wrap flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RFC 4303 in section 3.3.3 suggests to disable anti-replay for manually distributed ICVs in which case the sender does not need to monitor or reset the counter. However, the sender still increments the counter and when it reaches the maximum value, the counter rolls over back to zero. This patch introduces new extra_flag XFRM_SA_XFLAG_OSEQ_MAY_WRAP which allows sequence number to cycle in outbound packets if set. This flag is used only in legacy and bmp code, because esn should not be negotiated if anti-replay is disabled (see note in 3.3.3 section). Signed-off-by: Petr Vaněk Acked-by: Christophe Gouault Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index ff7cfdc6cb44..ffc6a5391bb7 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -387,6 +387,7 @@ struct xfrm_usersa_info { }; #define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1 +#define XFRM_SA_XFLAG_OSEQ_MAY_WRAP 2 struct xfrm_usersa_id { xfrm_address_t daddr; -- cgit v1.2.3 From 0b8975bdc0cc5310d48d9bdd871cefebe1f94c99 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 3 Jun 2020 10:27:48 -0700 Subject: dmaengine: idxd: fix hw descriptor fields for delta record Fix the hw descriptor fields for delta record in user exported idxd.h header. Missing the "expected result mask" field. Reported-by: Mona Hossain Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/159120526866.65385.536565786678052944.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 1f412fbf561b..e103c1434e4b 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -110,9 +110,12 @@ struct dsa_hw_desc { uint16_t rsvd1; union { uint8_t expected_res; + /* create delta record */ struct { uint64_t delta_addr; uint32_t max_delta_size; + uint32_t delt_rsvd; + uint8_t expected_res_mask; }; uint32_t delta_rec_size; uint64_t dest2; -- cgit v1.2.3 From 62fb45d317c5fa08e4db093441835bb6f33acbd7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 18 Jun 2020 16:42:06 +0200 Subject: USB: ch9: add "USB_" prefix in front of TEST defines For some reason, the TEST_ defines in the usb/ch9.h files did not have the USB_ prefix on it, making it a bit confusing when reading the file, as well as not the nicest thing to do in a uapi file. So fix that up and add the USB_ prefix on to them, and fix up all in-kernel usages. This included deleting the duplicate copy in the net2272.h file. Cc: Felipe Balbi Cc: Michal Simek Cc: Mathias Nyman Cc: Pawel Laszczak Cc: YueHaibing Cc: Nathan Chancellor Cc: Jason Yan Cc: Jia-Ju Bai Cc: Stephen Boyd Cc: Christophe JAILLET Cc: Arnd Bergmann Cc: Jules Irenge Cc: Alan Stern Cc: Thinh Nguyen Cc: Rob Gill Cc: Macpaul Lin Acked-by: Minas Harutyunyan Acked-by: Bin Liu Acked-by: Chunfeng Yun Acked-by: Peter Chen Link: https://lore.kernel.org/r/20200618144206.2655890-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/ch9.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index b1ed2ccfe9cf..48766fdf6580 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -138,11 +138,11 @@ * Test Mode Selectors * See USB 2.0 spec Table 9-7 */ -#define TEST_J 1 -#define TEST_K 2 -#define TEST_SE0_NAK 3 -#define TEST_PACKET 4 -#define TEST_FORCE_EN 5 +#define USB_TEST_J 1 +#define USB_TEST_K 2 +#define USB_TEST_SE0_NAK 3 +#define USB_TEST_PACKET 4 +#define USB_TEST_FORCE_ENABLE 5 /* Status Type */ #define USB_STATUS_TYPE_STANDARD 0 -- cgit v1.2.3 From f9bcf96837f158db6ea982d15cd2c8161ca6bc23 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Sat, 20 Jun 2020 18:30:52 +0300 Subject: bpf: Add SO_KEEPALIVE and related options to bpf_setsockopt This patch adds support of SO_KEEPALIVE flag and TCP related options to bpf_setsockopt() routine. This is helpful if we want to enable or tune TCP keepalive for applications which don't do it in the userspace code. v3: - update kernel-doc in uapi (Nikita Vetoshkin ) v4: - update kernel-doc in tools too (Alexei Starovoitov) - add test to selftests (Alexei Starovoitov) Signed-off-by: Dmitry Yakunin Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200620153052.9439-3-zeil@yandex-team.ru --- include/uapi/linux/bpf.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9d3923e6b860..d9737d51dd19 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1621,10 +1621,13 @@ union bpf_attr { * * * **SOL_SOCKET**, which supports the following *optname*\ s: * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, - * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. + * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, + * **SO_BINDTODEVICE**, **SO_KEEPALIVE**. * * **IPPROTO_TCP**, which supports the following *optname*\ s: * **TCP_CONGESTION**, **TCP_BPF_IW**, - * **TCP_BPF_SNDCWND_CLAMP**. + * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, + * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * Return -- cgit v1.2.3 From dc5bdb68b5b369d5bc7d1de96fa64cc1737a6320 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 24 Jun 2020 11:29:10 +0200 Subject: drm/fb-helper: Fix vt restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past we had a pile of hacks to orchestrate access between fbdev emulation and native kms clients. We've tried to streamline this, by always preferring the kms side above fbdev calls when a drm master exists, because drm master controls access to the display resources. Unfortunately this breaks existing userspace, specifically Xorg. When exiting Xorg first restores the console to text mode using the KDSET ioctl on the vt. This does nothing, because a drm master is still around. Then it drops the drm master status, which again does nothing, because logind is keeping additional drm fd open to be able to orchestrate vt switches. In the past this is the point where fbdev was restored, as part of the ->lastclose hook on the drm side. Now to fix this regression we don't want to go back to letting fbdev restore things whenever it feels like, or to the pile of hacks we've had before. Instead try and go with a minimal exception to make the KDSET case work again, and nothing else. This means that if userspace does a KDSET call when switching between graphical compositors, there will be some flickering with fbcon showing up for a bit. But a) that's not a regression and b) userspace can fix it by improving the vt switching dance - logind should have all the information it needs. While pondering all this I'm also wondering wheter we should have a SWITCH_MASTER ioctl to allow race-free master status handover. But that's for another day. v2: Somehow forgot to cc all the fbdev people. v3: Fix typo Alex spotted. Reviewed-by: Alex Deucher Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=208179 Cc: shlomo@fastmail.com Reported-and-Tested-by: shlomo@fastmail.com Cc: Michel Dänzer Fixes: 64914da24ea9 ("drm/fbdev-helper: don't force restores") Cc: Noralf Trønnes Cc: Thomas Zimmermann Cc: Daniel Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: # v5.7+ Cc: Bartlomiej Zolnierkiewicz Cc: Geert Uytterhoeven Cc: Nathan Chancellor Cc: Qiujun Huang Cc: Peter Rosin Cc: linux-fbdev@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200624092910.3280448-1-daniel.vetter@ffwll.ch --- include/uapi/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fb.h b/include/uapi/linux/fb.h index b6aac7ee1f67..4c14e8be7267 100644 --- a/include/uapi/linux/fb.h +++ b/include/uapi/linux/fb.h @@ -205,6 +205,7 @@ struct fb_bitfield { #define FB_ACTIVATE_ALL 64 /* change all VCs on this fb */ #define FB_ACTIVATE_FORCE 128 /* force apply even when no change*/ #define FB_ACTIVATE_INV_MODE 256 /* invalidate videomode */ +#define FB_ACTIVATE_KD_TEXT 512 /* for KDSET vt ioctl */ #define FB_ACCELF_TEXT 1 /* (OBSOLETE) see fb_info.flags and vc_mode */ -- cgit v1.2.3 From 899426b3bdd947541ba4af8c767575889c8b842a Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 23 Jun 2020 23:47:16 +0300 Subject: net: neighbor: add fdb extended attribute Add an attribute to NDA which will contain all future fdb-specific attributes in order to avoid polluting the NDA namespace with e.g. bridge or vxlan specific attributes. The attribute is called NDA_FDB_EXT_ATTRS and the structure would look like: [NDA_FDB_EXT_ATTRS] = { [NFEA_xxx] } Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index eefcda8ca44e..540ff48402a1 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -30,6 +30,7 @@ enum { NDA_SRC_VNI, NDA_PROTOCOL, /* Originator of entry */ NDA_NH_ID, + NDA_FDB_EXT_ATTRS, __NDA_MAX }; @@ -172,4 +173,15 @@ enum { }; #define NDTA_MAX (__NDTA_MAX - 1) +/* embedded into NDA_FDB_EXT_ATTRS: + * [NDA_FDB_EXT_ATTRS] = { + * ... + * } + */ +enum { + NFEA_UNSPEC, + __NFEA_MAX +}; +#define NFEA_MAX (__NFEA_MAX - 1) + #endif -- cgit v1.2.3 From 31cbc39b6344916c20452e43a9171009214c409c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 23 Jun 2020 23:47:17 +0300 Subject: net: bridge: add option to allow activity notifications for any fdb entries This patch adds the ability to notify about activity of any entries (static, permanent or ext_learn). EVPN multihoming peers need it to properly and efficiently handle mac sync (peer active/locally active). We add a new NFEA_ACTIVITY_NOTIFY attribute which is used to dump the current activity state and to control if static entries should be monitored at all. We use 2 bits - one to activate fdb entry tracking (disabled by default) and the second to denote that an entry is inactive. We need the second bit in order to avoid multiple notifications of inactivity. Obviously this makes no difference for dynamic entries since at the time of inactivity they get deleted, while the tracked non-dynamic entries get the inactive bit set and get a notification. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 540ff48402a1..21e569297355 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -173,13 +173,24 @@ enum { }; #define NDTA_MAX (__NDTA_MAX - 1) + /* FDB activity notification bits used in NFEA_ACTIVITY_NOTIFY: + * - FDB_NOTIFY_BIT - notify on activity/expire for any entry + * - FDB_NOTIFY_INACTIVE_BIT - mark as inactive to avoid multiple notifications + */ +enum { + FDB_NOTIFY_BIT = (1 << 0), + FDB_NOTIFY_INACTIVE_BIT = (1 << 1) +}; + /* embedded into NDA_FDB_EXT_ATTRS: * [NDA_FDB_EXT_ATTRS] = { + * [NFEA_ACTIVITY_NOTIFY] * ... * } */ enum { NFEA_UNSPEC, + NFEA_ACTIVITY_NOTIFY, __NFEA_MAX }; #define NFEA_MAX (__NFEA_MAX - 1) -- cgit v1.2.3 From b5f1d9ec283bd28a452cf61d7e5c2f2b1a9cccda Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 23 Jun 2020 23:47:18 +0300 Subject: net: bridge: add a flag to avoid refreshing fdb when changing/adding When we modify or create a new fdb entry sometimes we want to avoid refreshing its activity in order to track it properly. One example is when a mac is received from EVPN multi-homing peer by FRR, which doesn't want to change local activity accounting. It makes it static and sets a flag to track its activity. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 21e569297355..dc8b72201f6c 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -191,6 +191,7 @@ enum { enum { NFEA_UNSPEC, NFEA_ACTIVITY_NOTIFY, + NFEA_DONT_REFRESH, __NFEA_MAX }; #define NFEA_MAX (__NFEA_MAX - 1) -- cgit v1.2.3 From af7ec13833619e17f03aa73a785a2f871da6d66b Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:09 -0700 Subject: bpf: Add bpf_skc_to_tcp6_sock() helper The helper is used in tracing programs to cast a socket pointer to a tcp6_sock pointer. The return value could be NULL if the casting is illegal. A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added so the verifier is able to deduce proper return types for the helper. Different from the previous BTF_ID based helpers, the bpf_skc_to_tcp6_sock() argument can be several possible btf_ids. More specifically, all possible socket data structures with sock_common appearing in the first in the memory layout. This patch only added socket types related to tcp and udp. All possible argument btf_id and return value btf_id for helper bpf_skc_to_tcp6_sock() are pre-calculcated and cached. In the future, it is even possible to precompute these btf_id's at kernel build time. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230809.3988195-1-yhs@fb.com --- include/uapi/linux/bpf.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d9737d51dd19..e90ad07b291a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3255,6 +3255,12 @@ union bpf_attr { * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level * is returned or the error code -EACCES in case the skb is not * subject to CHECKSUM_UNNECESSARY. + * + * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3392,7 +3398,8 @@ union bpf_attr { FN(ringbuf_submit), \ FN(ringbuf_discard), \ FN(ringbuf_query), \ - FN(csum_level), + FN(csum_level), \ + FN(skc_to_tcp6_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 478cfbdf5f13dfe09cfd0b1cbac821f5e27f6108 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:11 -0700 Subject: bpf: Add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers Three more helpers are added to cast a sock_common pointer to an tcp_sock, tcp_timewait_sock or a tcp_request_sock for tracing programs. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230811.3988277-1-yhs@fb.com --- include/uapi/linux/bpf.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e90ad07b291a..b9412ab275f3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3261,6 +3261,24 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. * Return * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3399,7 +3417,10 @@ union bpf_attr { FN(ringbuf_discard), \ FN(ringbuf_query), \ FN(csum_level), \ - FN(skc_to_tcp6_sock), + FN(skc_to_tcp6_sock), \ + FN(skc_to_tcp_sock), \ + FN(skc_to_tcp_timewait_sock), \ + FN(skc_to_tcp_request_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 0d4fad3e57df2bf61e8ffc8d12a34b1caf9b8835 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:15 -0700 Subject: bpf: Add bpf_skc_to_udp6_sock() helper The helper is used in tracing programs to cast a socket pointer to a udp6_sock pointer. The return value could be NULL if the casting is illegal. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Cc: Eric Dumazet Link: https://lore.kernel.org/bpf/20200623230815.3988481-1-yhs@fb.com --- include/uapi/linux/bpf.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b9412ab275f3..0cb8ec948816 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3279,6 +3279,12 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. * Return * *sk* if casting is valid, or NULL otherwise. + * + * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3420,7 +3426,8 @@ union bpf_attr { FN(skc_to_tcp6_sock), \ FN(skc_to_tcp_sock), \ FN(skc_to_tcp_timewait_sock), \ - FN(skc_to_tcp_request_sock), + FN(skc_to_tcp_request_sock), \ + FN(skc_to_udp6_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 4c342f778fe234e0c2a2601d87fec8ba42f0d2c6 Mon Sep 17 00:00:00 2001 From: Rao Shoaib Date: Thu, 25 Jun 2020 13:46:00 -0700 Subject: rds: transport module should be auto loaded when transport is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enhancement auto loads transport module when the transport is set via SO_RDS_TRANSPORT socket option. Reviewed-by: Ka-Cheong Poon Reviewed-by: Håkon Bugge Signed-off-by: Rao Shoaib Signed-off-by: Somasundaram Krishnasamy Signed-off-by: David S. Miller --- include/uapi/linux/rds.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index cba368e55863..c21edb966c19 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -64,10 +64,12 @@ /* supported values for SO_RDS_TRANSPORT */ #define RDS_TRANS_IB 0 -#define RDS_TRANS_IWARP 1 +#define RDS_TRANS_GAP 1 #define RDS_TRANS_TCP 2 #define RDS_TRANS_COUNT 3 #define RDS_TRANS_NONE (~0) +/* don't use RDS_TRANS_IWARP - it is deprecated */ +#define RDS_TRANS_IWARP RDS_TRANS_GAP /* IOCTLS commands for SOL_RDS */ #define SIOCRDSSETTOS (SIOCPROTOPRIVATE) -- cgit v1.2.3 From bccb48c89fe3c09f1cbb7c8612e31f5daa1d4541 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 1 Jun 2020 20:13:21 +0200 Subject: batman-adv: Fix typos and grammar in documentation Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batadv_packet.h | 50 +++++++++++++++++++------------------- include/uapi/linux/batman_adv.h | 4 +-- 2 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index 0ae34c85ef9e..9c8604c5b5f6 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -72,8 +72,8 @@ enum batadv_subtype { /** * enum batadv_iv_flags - flags used in B.A.T.M.A.N. IV OGM packets - * @BATADV_NOT_BEST_NEXT_HOP: flag is set when ogm packet is forwarded and was - * previously received from someone else than the best neighbor. + * @BATADV_NOT_BEST_NEXT_HOP: flag is set when the ogm packet is forwarded and + * was previously received from someone other than the best neighbor. * @BATADV_PRIMARIES_FIRST_HOP: flag unused. * @BATADV_DIRECTLINK: flag is for the first hop or if rebroadcasted from a * one hop neighbor on the interface where it was originally received. @@ -195,8 +195,8 @@ struct batadv_bla_claim_dst { /** * struct batadv_ogm_packet - ogm (routing protocol) packet * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @flags: contains routing relevant flags - see enum batadv_iv_flags * @seqno: sequence identification * @orig: address of the source node @@ -247,7 +247,7 @@ struct batadv_ogm2_packet { /** * struct batadv_elp_packet - elp (neighbor discovery) packet * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header + * @version: batman-adv protocol version, part of the general header * @orig: originator mac address * @seqno: sequence number * @elp_interval: currently used ELP sending interval in ms @@ -265,15 +265,15 @@ struct batadv_elp_packet { /** * struct batadv_icmp_header - common members among all the ICMP packets * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @msg_type: ICMP packet type * @dst: address of the destination node * @orig: address of the source node * @uid: local ICMP socket identifier * @align: not used - useful for alignment purposes only * - * This structure is used for ICMP packets parsing only and it is never sent + * This structure is used for ICMP packet parsing only and it is never sent * over the wire. The alignment field at the end is there to ensure that * members are padded the same way as they are in real packets. */ @@ -291,8 +291,8 @@ struct batadv_icmp_header { /** * struct batadv_icmp_packet - ICMP packet * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @msg_type: ICMP packet type * @dst: address of the destination node * @orig: address of the source node @@ -315,8 +315,8 @@ struct batadv_icmp_packet { /** * struct batadv_icmp_tp_packet - ICMP TP Meter packet * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @msg_type: ICMP packet type * @dst: address of the destination node * @orig: address of the source node @@ -358,8 +358,8 @@ enum batadv_icmp_tp_subtype { /** * struct batadv_icmp_packet_rr - ICMP RouteRecord packet * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @msg_type: ICMP packet type * @dst: address of the destination node * @orig: address of the source node @@ -397,8 +397,8 @@ struct batadv_icmp_packet_rr { /** * struct batadv_unicast_packet - unicast packet for network payload * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @ttvn: translation table version number * @dest: originator destination of the unicast packet */ @@ -433,8 +433,8 @@ struct batadv_unicast_4addr_packet { /** * struct batadv_frag_packet - fragmented packet * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @dest: final destination used when routing fragments * @orig: originator of the fragment used when merging the packet * @no: fragment number within this sequence @@ -467,8 +467,8 @@ struct batadv_frag_packet { /** * struct batadv_bcast_packet - broadcast packet for network payload * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @reserved: reserved byte for alignment * @seqno: sequence identification * @orig: originator of the broadcast packet @@ -488,10 +488,10 @@ struct batadv_bcast_packet { /** * struct batadv_coded_packet - network coded packet * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @first_source: original source of first included packet - * @first_orig_dest: original destinal of first included packet + * @first_orig_dest: original destination of first included packet * @first_crc: checksum of first included packet * @first_ttvn: tt-version number of first included packet * @second_ttl: ttl of second packet @@ -523,8 +523,8 @@ struct batadv_coded_packet { /** * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @reserved: reserved field (for packet alignment) * @src: address of the source * @dst: address of the destination diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 617c180ff0c8..8cf2ad11ead9 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -69,7 +69,7 @@ enum batadv_tt_client_flags { /** * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be - * part of the network but no nnode has already announced it + * part of the network but no node has already announced it */ BATADV_TT_CLIENT_TEMP = (1 << 11), }; @@ -131,7 +131,7 @@ enum batadv_gw_modes { /** @BATADV_GW_MODE_CLIENT: send DHCP requests to gw servers */ BATADV_GW_MODE_CLIENT, - /** @BATADV_GW_MODE_SERVER: announce itself as gatway server */ + /** @BATADV_GW_MODE_SERVER: announce itself as gateway server */ BATADV_GW_MODE_SERVER, }; -- cgit v1.2.3 From 3bda14d09dc5789a895ab02b7dcfcec19b0a65b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 1 Jun 2020 22:35:22 +0200 Subject: batman-adv: Introduce a configurable per interface hop penalty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some setups multiple hard interfaces with similar link qualities or throughput values are available. But people have expressed the desire to consider one of them as a backup only. Some creative solutions are currently in use: Such people are configuring multiple batman-adv mesh/soft interfaces, wire them together with some veth pairs and then tune the hop penalty to achieve an effect similar to a tunable per interface hop penalty. This patch introduces a new, configurable, per hard interface hop penalty to simplify such setups. Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 8cf2ad11ead9..bb0ae945b36a 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -427,7 +427,8 @@ enum batadv_nl_attrs { /** * @BATADV_ATTR_HOP_PENALTY: defines the penalty which will be applied - * to an originator message's tq-field on every hop. + * to an originator message's tq-field on every hop and/or per + * hard interface */ BATADV_ATTR_HOP_PENALTY, -- cgit v1.2.3 From 322b598be4d9b9090cda560c4caab78704615ab4 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Tue, 16 Jun 2020 12:08:44 +0800 Subject: fpga: dfl: introduce interrupt trigger setting API FPGA user applications may be interested in interrupts generated by DFL features. For example, users can implement their own FPGA logics with interrupts enabled in AFU (Accelerated Function Unit, dynamic region of DFL based FPGA). So user applications need to be notified to handle these interrupts. In order to allow userspace applications to monitor interrupts, driver requires userspace to provide eventfds as interrupt notification channels. Applications then poll/select on the eventfds to get notified. This patch introduces a generic helper functions to do eventfds binding with given interrupts. Sub feature drivers are expected to use XXX_GET_IRQ_NUM to query irq info, and XXX_SET_IRQ to set eventfds for interrupts. This patch also introduces helper functions for these 2 ioctls. Signed-off-by: Luwei Kang Signed-off-by: Wu Hao Signed-off-by: Xu Yilun Signed-off-by: Tom Rix Reviewed-by: Marcelo Tosatti Acked-by: Wu Hao Signed-off-by: Moritz Fischer --- include/uapi/linux/fpga-dfl.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fpga-dfl.h b/include/uapi/linux/fpga-dfl.h index ec70a0746e59..7331350f3067 100644 --- a/include/uapi/linux/fpga-dfl.h +++ b/include/uapi/linux/fpga-dfl.h @@ -151,6 +151,19 @@ struct dfl_fpga_port_dma_unmap { #define DFL_FPGA_PORT_DMA_UNMAP _IO(DFL_FPGA_MAGIC, DFL_PORT_BASE + 4) +/** + * struct dfl_fpga_irq_set - the argument for DFL_FPGA_XXX_SET_IRQ ioctl. + * + * @start: Index of the first irq. + * @count: The number of eventfd handler. + * @evtfds: Eventfd handlers. + */ +struct dfl_fpga_irq_set { + __u32 start; + __u32 count; + __s32 evtfds[]; +}; + /* IOCTLs for FME file descriptor */ /** -- cgit v1.2.3 From fe80536acf8397827be77f9b8ada384b90e790d0 Mon Sep 17 00:00:00 2001 From: Martin Date: Sun, 28 Jun 2020 23:18:23 +0530 Subject: bareudp: Added attribute to enable & disable rx metadata collection Metadata need not be collected in receive if the packet from bareudp device is not targeted to openvswitch. Signed-off-by: Martin Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a009365ad67b..cc185a007ade 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -600,6 +600,7 @@ enum { IFLA_BAREUDP_ETHERTYPE, IFLA_BAREUDP_SRCPORT_MIN, IFLA_BAREUDP_MULTIPROTO_MODE, + IFLA_BAREUDP_RX_COLLECT_METADATA, __IFLA_BAREUDP_MAX }; -- cgit v1.2.3 From aee9caa03fc3c8b02f8f31824354d85f30e562e0 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sat, 27 Jun 2020 01:45:28 +0300 Subject: net: sched: sch_red: Add qevents "early_drop" and "mark" In order to allow acting on dropped and/or ECN-marked packets, add two new qevents to the RED qdisc: "early_drop" and "mark". Filters attached at "early_drop" block are executed as packets are early-dropped, those attached at the "mark" block are executed as packets are ECN-marked. Two new attributes are introduced: TCA_RED_EARLY_DROP_BLOCK with the block index for the "early_drop" qevent, and TCA_RED_MARK_BLOCK for the "mark" qevent. Absence of these attributes signifies "don't care": no block is allocated in that case, or the existing blocks are left intact in case of the change callback. For purposes of offloading, blocks attached to these qevents appear with newly-introduced binder types, FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP and FLOW_BLOCK_BINDER_TYPE_RED_MARK. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index a95f3ae7ab37..9e7c2c607845 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -257,6 +257,8 @@ enum { TCA_RED_STAB, TCA_RED_MAX_P, TCA_RED_FLAGS, /* bitfield32 */ + TCA_RED_EARLY_DROP_BLOCK, /* u32 */ + TCA_RED_MARK_BLOCK, /* u32 */ __TCA_RED_MAX, }; -- cgit v1.2.3 From ecc31c60240b9808a274befc5db6b8a249a6ade1 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 29 Jun 2020 23:46:16 +0300 Subject: ethtool: Add link extended state Currently, drivers can only tell whether the link is up/down using LINKSTATE_GET, but no additional information is given. Add attributes to LINKSTATE_GET command in order to allow drivers to expose the user more information in addition to link state to ease the debug process, for example, reason for link down state. Extended state consists of two attributes - link_ext_state and link_ext_substate. The idea is to avoid 'vendor specific' states in order to prevent drivers to use specific link_ext_state that can be in the future common link_ext_state. The substates allows drivers to add more information to the common link_ext_state. For example, vendor can expose 'Autoneg' as link_ext_state and add 'No partner detected during force mode' as link_ext_substate. If a driver cannot pinpoint the extended state with the substate accuracy, it is free to expose only the extended state and omit the substate attribute. Signed-off-by: Amit Cohen Reviewed-by: Jiri Pirko Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 70 ++++++++++++++++++++++++++++++++++++ include/uapi/linux/ethtool_netlink.h | 2 ++ 2 files changed, 72 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index f4662b3a9e1e..d1413538ef30 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -579,6 +579,76 @@ struct ethtool_pauseparam { __u32 tx_pause; }; +/** + * enum ethtool_link_ext_state - link extended state + */ +enum ethtool_link_ext_state { + ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_STATE_NO_CABLE, + ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, + ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, + ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, + ETHTOOL_LINK_EXT_STATE_OVERHEAT, +}; + +/** + * enum ethtool_link_ext_substate_autoneg - more information in addition to + * ETHTOOL_LINK_EXT_STATE_AUTONEG. + */ +enum ethtool_link_ext_substate_autoneg { + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1, + ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE, + ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD, +}; + +/** + * enum ethtool_link_ext_substate_link_training - more information in addition to + * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE. + */ +enum ethtool_link_ext_substate_link_training { + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY, + ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT, +}; + +/** + * enum ethtool_link_ext_substate_logical_mismatch - more information in addition + * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH. + */ +enum ethtool_link_ext_substate_link_logical_mismatch { + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED, +}; + +/** + * enum ethtool_link_ext_substate_bad_signal_integrity - more information in + * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY. + */ +enum ethtool_link_ext_substate_bad_signal_integrity { + ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE, +}; + +/** + * enum ethtool_link_ext_substate_cable_issue - more information in + * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. + */ +enum ethtool_link_ext_substate_cable_issue { + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1, + ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE, +}; + #define ETH_GSTRING_LEN 32 /** diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 4dda5e4244a7..c12ce4df4b6b 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -236,6 +236,8 @@ enum { ETHTOOL_A_LINKSTATE_LINK, /* u8 */ ETHTOOL_A_LINKSTATE_SQI, /* u32 */ ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ + ETHTOOL_A_LINKSTATE_EXT_STATE, /* u8 */ + ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, /* u8 */ /* add new constants above here */ __ETHTOOL_A_LINKSTATE_CNT, -- cgit v1.2.3 From 970471914c67b70df24def6b2a30cc42acbebded Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 16 Jun 2020 16:47:14 +0200 Subject: iommu: Allow page responses without PASID Some PCIe devices do not expect a PASID value in PRI Page Responses. If the "PRG Response PASID Required" bit in the PRI capability is zero, then the OS should not set the PASID field. Similarly on Arm SMMU, responses to stall events do not have a PASID. Currently iommu_page_response() systematically checks that the PASID in the page response corresponds to the one in the page request. This can't work with virtualization because a page response coming from a guest OS won't have a PASID if the passed-through device does not require one. Add a flag to page requests that declares whether the corresponding response needs to have a PASID. When this flag isn't set, allow page responses without PASID. Reported-by: Shameerali Kolothum Thodi Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20200616144712.748818-1-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- include/uapi/linux/iommu.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index e907b7091a46..c2b2caf9ed41 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -81,7 +81,10 @@ struct iommu_fault_unrecoverable { /** * struct iommu_fault_page_request - Page Request data * @flags: encodes whether the corresponding fields are valid and whether this - * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values) + * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values). + * When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response + * must have the same PASID value as the page request. When it is clear, + * the page response should not have a PASID. * @pasid: Process Address Space ID * @grpid: Page Request Group Index * @perm: requested page permissions (IOMMU_FAULT_PERM_* values) @@ -92,6 +95,7 @@ struct iommu_fault_page_request { #define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0) #define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1) #define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2) +#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3) __u32 flags; __u32 pasid; __u32 grpid; -- cgit v1.2.3 From fa28dcb82a38f8e3993b0fae9106b1a80b59e4f0 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 29 Jun 2020 23:28:44 -0700 Subject: bpf: Introduce helper bpf_get_task_stack() Introduce helper bpf_get_task_stack(), which dumps stack trace of given task. This is different to bpf_get_stack(), which gets stack track of current task. One potential use case of bpf_get_task_stack() is to call it from bpf_iter__task and dump all /proc//stack to a seq_file. bpf_get_task_stack() uses stack_trace_save_tsk() instead of get_perf_callchain() for kernel stack. The benefit of this choice is that stack_trace_save_tsk() doesn't require changes in arch/. The downside of using stack_trace_save_tsk() is that stack_trace_save_tsk() dumps the stack trace to unsigned long array. For 32-bit systems, we need to translate it to u64 array. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200630062846.664389-3-songliubraving@fb.com --- include/uapi/linux/bpf.h | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0cb8ec948816..da9bf35a26f8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3285,6 +3285,39 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. * Return * *sk* if casting is valid, or NULL otherwise. + * + * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) + * Description + * Return a user or a kernel stack in bpf program provided buffer. + * To achieve this, the helper needs *task*, which is a valid + * pointer to struct task_struct. To store the stacktrace, the + * bpf program provides *buf* with a nonnegative *size*. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_USER_BUILD_ID** + * Collect buildid+offset instead of ips for user stack, + * only valid if **BPF_F_USER_STACK** is also specified. + * + * **bpf_get_task_stack**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject + * to sufficient large buffer size. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack= + * Return + * A non-negative value equal to or less than *size* on success, + * or a negative error in case of failure. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3427,7 +3460,9 @@ union bpf_attr { FN(skc_to_tcp_sock), \ FN(skc_to_tcp_timewait_sock), \ FN(skc_to_tcp_request_sock), \ - FN(skc_to_udp6_sock), + FN(skc_to_udp6_sock), \ + FN(get_task_stack), \ + /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From e4266b991fead8eb996688e82ff39f6cc59ef7dd Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Thu, 2 Jul 2020 10:13:05 +0200 Subject: bridge: uapi: mrp: Extend MRP attributes to get the status Add MRP attribute IFLA_BRIDGE_MRP_INFO to allow the userspace to get the current state of the MRP instances. This is a nested attribute that contains other attributes like, ring id, index of primary and secondary port, priority, ring state, ring role. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index caa6914a3e53..c114c1c2bd53 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -166,6 +166,7 @@ enum { IFLA_BRIDGE_MRP_RING_STATE, IFLA_BRIDGE_MRP_RING_ROLE, IFLA_BRIDGE_MRP_START_TEST, + IFLA_BRIDGE_MRP_INFO, __IFLA_BRIDGE_MRP_MAX, }; @@ -228,6 +229,22 @@ enum { #define IFLA_BRIDGE_MRP_START_TEST_MAX (__IFLA_BRIDGE_MRP_START_TEST_MAX - 1) +enum { + IFLA_BRIDGE_MRP_INFO_UNSPEC, + IFLA_BRIDGE_MRP_INFO_RING_ID, + IFLA_BRIDGE_MRP_INFO_P_IFINDEX, + IFLA_BRIDGE_MRP_INFO_S_IFINDEX, + IFLA_BRIDGE_MRP_INFO_PRIO, + IFLA_BRIDGE_MRP_INFO_RING_STATE, + IFLA_BRIDGE_MRP_INFO_RING_ROLE, + IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL, + IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS, + IFLA_BRIDGE_MRP_INFO_TEST_MONITOR, + __IFLA_BRIDGE_MRP_INFO_MAX, +}; + +#define IFLA_BRIDGE_MRP_INFO_MAX (__IFLA_BRIDGE_MRP_INFO_MAX - 1) + struct br_mrp_instance { __u32 ring_id; __u32 p_ifindex; -- cgit v1.2.3 From 36a8e8e26542056bbd7eb5e047cadee30587d230 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Thu, 2 Jul 2020 10:13:07 +0200 Subject: bridge: Extend br_fill_ifinfo to return MPR status This patch extends the function br_fill_ifinfo to return also the MRP status for each instance on a bridge. It also adds a new filter RTEXT_FILTER_MRP to return the MRP status only when this is set, not to interfer with the vlans. The MRP status is return only on the bridge interfaces. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 879e64950a0a..9b814c92de12 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -778,6 +778,7 @@ enum { #define RTEXT_FILTER_BRVLAN (1 << 1) #define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2) #define RTEXT_FILTER_SKIP_STATS (1 << 3) +#define RTEXT_FILTER_MRP (1 << 4) /* End of information exported to user level */ -- cgit v1.2.3 From 74cccc3d38438b346e40a4f8133cff3f0839ff84 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:11 +0200 Subject: netfilter: nf_tables: add NFTA_CHAIN_ID attribute This netlink attribute allows you to refer to chains inside a transaction as an alternative to the name and the handle. The chain binding support requires this new chain ID approach. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 4565456c0ef4..477779595b78 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -196,6 +196,7 @@ enum nft_table_attributes { * @NFTA_CHAIN_TYPE: type name of the string (NLA_NUL_STRING) * @NFTA_CHAIN_COUNTERS: counter specification of the chain (NLA_NESTED: nft_counter_attributes) * @NFTA_CHAIN_FLAGS: chain flags + * @NFTA_CHAIN_ID: uniquely identifies a chain in a transaction (NLA_U32) */ enum nft_chain_attributes { NFTA_CHAIN_UNSPEC, @@ -209,6 +210,7 @@ enum nft_chain_attributes { NFTA_CHAIN_COUNTERS, NFTA_CHAIN_PAD, NFTA_CHAIN_FLAGS, + NFTA_CHAIN_ID, __NFTA_CHAIN_MAX }; #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) -- cgit v1.2.3 From 837830a4b439bfeb86c70b0115c280377c84714b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:16 +0200 Subject: netfilter: nf_tables: add NFTA_RULE_CHAIN_ID attribute This new netlink attribute allows you to add rules to chains by the chain ID. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 477779595b78..2304d1b7ba5e 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -240,6 +240,7 @@ enum nft_rule_attributes { NFTA_RULE_PAD, NFTA_RULE_ID, NFTA_RULE_POSITION_ID, + NFTA_RULE_CHAIN_ID, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) -- cgit v1.2.3 From 51d70f181ff4e2c996ddf256af1efecd7d5864e5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:21 +0200 Subject: netfilter: nf_tables: add NFTA_VERDICT_CHAIN_ID attribute This netlink attribute allows you to identify the chain to jump/goto by means of the chain ID. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 2304d1b7ba5e..683e75126d68 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -471,11 +471,13 @@ enum nft_data_attributes { * * @NFTA_VERDICT_CODE: nf_tables verdict (NLA_U32: enum nft_verdicts) * @NFTA_VERDICT_CHAIN: jump target chain name (NLA_STRING) + * @NFTA_VERDICT_CHAIN_ID: jump target chain ID (NLA_U32) */ enum nft_verdict_attributes { NFTA_VERDICT_UNSPEC, NFTA_VERDICT_CODE, NFTA_VERDICT_CHAIN, + NFTA_VERDICT_CHAIN_ID, __NFTA_VERDICT_MAX }; #define NFTA_VERDICT_MAX (__NFTA_VERDICT_MAX - 1) -- cgit v1.2.3 From 67c49de4ad862c567088c5119cf125e566f56e7f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:25 +0200 Subject: netfilter: nf_tables: expose enum nft_chain_flags through UAPI This enum definition was never exposed through UAPI. Rename NFT_BASE_CHAIN to NFT_CHAIN_BASE for consistency. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 683e75126d68..2cf7cc3b50c1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -184,6 +184,11 @@ enum nft_table_attributes { }; #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) +enum nft_chain_flags { + NFT_CHAIN_BASE = (1 << 0), + NFT_CHAIN_HW_OFFLOAD = (1 << 1), +}; + /** * enum nft_chain_attributes - nf_tables chain netlink attributes * -- cgit v1.2.3 From d0e2c7de92c7f2b3d355ad76b0bb9fc43d1beb87 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:36 +0200 Subject: netfilter: nf_tables: add NFT_CHAIN_BINDING This new chain flag specifies that: * the kernel dynamically allocates the chain name, if no chain name is specified. * If the immediate expression that refers to this chain is removed, then this bound chain (and its content) is destroyed. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 2cf7cc3b50c1..e00b4ae6174e 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -187,6 +187,7 @@ enum nft_table_attributes { enum nft_chain_flags { NFT_CHAIN_BASE = (1 << 0), NFT_CHAIN_HW_OFFLOAD = (1 << 1), + NFT_CHAIN_BINDING = (1 << 2), }; /** -- cgit v1.2.3 From c1f79a2eefdcc0aef5d7a911c27a3f75f1936ecd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 4 Jul 2020 02:51:28 +0200 Subject: netfilter: nf_tables: reject unsupported chain flags Bail out if userspace sends unsupported chain flags. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index e00b4ae6174e..42f351c1f5c5 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -189,6 +189,9 @@ enum nft_chain_flags { NFT_CHAIN_HW_OFFLOAD = (1 << 1), NFT_CHAIN_BINDING = (1 << 2), }; +#define NFT_CHAIN_FLAGS (NFT_CHAIN_BASE | \ + NFT_CHAIN_HW_OFFLOAD | \ + NFT_CHAIN_BINDING) /** * enum nft_chain_attributes - nf_tables chain netlink attributes -- cgit v1.2.3 From ea1be1e59b19017e61aa357d524b743ba5602d3c Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 7 May 2020 20:03:35 +0800 Subject: serial: Remove duplicated macro definition of port type There exists the same macro definition of port type from 0 to 13 in include/uapi/linux/serial.h, remove these duplicated code in include/uapi/linux/serial_core.h which includes the former header. Acked-by: Jiri Slaby Signed-off-by: Tiezhu Yang Link: https://lore.kernel.org/r/1588853015-28392-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 8ec3dd742ea4..851b982f8c4b 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -26,20 +26,6 @@ /* * The type definitions. These are from Ted Ts'o's serial.h */ -#define PORT_UNKNOWN 0 -#define PORT_8250 1 -#define PORT_16450 2 -#define PORT_16550 3 -#define PORT_16550A 4 -#define PORT_CIRRUS 5 -#define PORT_16650 6 -#define PORT_16650V2 7 -#define PORT_16750 8 -#define PORT_STARTECH 9 -#define PORT_16C950 10 -#define PORT_16654 11 -#define PORT_16850 12 -#define PORT_RSA 13 #define PORT_NS16550A 14 #define PORT_XSCALE 15 #define PORT_RM9000 16 /* PMC-Sierra RM9xxx internal UART */ -- cgit v1.2.3 From fe6a3d6521227e49857d0d6caabbdae3bd4aef89 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Tue, 16 Jun 2020 12:08:45 +0800 Subject: fpga: dfl: afu: add interrupt support for port error reporting Error reporting interrupt is very useful to notify users that some errors are detected by the hardware. Once users are notified, they could query hardware logged error states, no need to continuously poll on these states. This patch adds interrupt support for port error reporting sub feature. It follows the common DFL interrupt notification and handling mechanism, implements two ioctl commands below for user to query number of irqs supported, and set/unset interrupt triggers. Ioctls: * DFL_FPGA_PORT_ERR_GET_IRQ_NUM get the number of irqs, which is used to determine whether/how many interrupts error reporting feature supports. * DFL_FPGA_PORT_ERR_SET_IRQ set/unset given eventfds as error interrupt triggers. Signed-off-by: Luwei Kang Signed-off-by: Wu Hao Signed-off-by: Xu Yilun Reviewed-by: Marcelo Tosatti Acked-by: Wu Hao Signed-off-by: Moritz Fischer --- include/uapi/linux/fpga-dfl.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fpga-dfl.h b/include/uapi/linux/fpga-dfl.h index 7331350f3067..6c71c9d7e926 100644 --- a/include/uapi/linux/fpga-dfl.h +++ b/include/uapi/linux/fpga-dfl.h @@ -164,6 +164,29 @@ struct dfl_fpga_irq_set { __s32 evtfds[]; }; +/** + * DFL_FPGA_PORT_ERR_GET_IRQ_NUM - _IOR(DFL_FPGA_MAGIC, DFL_PORT_BASE + 5, + * __u32 num_irqs) + * + * Get the number of irqs supported by the fpga port error reporting private + * feature. Currently hardware supports up to 1 irq. + * Return: 0 on success, -errno on failure. + */ +#define DFL_FPGA_PORT_ERR_GET_IRQ_NUM _IOR(DFL_FPGA_MAGIC, \ + DFL_PORT_BASE + 5, __u32) + +/** + * DFL_FPGA_PORT_ERR_SET_IRQ - _IOW(DFL_FPGA_MAGIC, DFL_PORT_BASE + 6, + * struct dfl_fpga_irq_set) + * + * Set fpga port error reporting interrupt trigger if evtfds[n] is valid. + * Unset related interrupt trigger if evtfds[n] is a negative value. + * Return: 0 on success, -errno on failure. + */ +#define DFL_FPGA_PORT_ERR_SET_IRQ _IOW(DFL_FPGA_MAGIC, \ + DFL_PORT_BASE + 6, \ + struct dfl_fpga_irq_set) + /* IOCTLs for FME file descriptor */ /** -- cgit v1.2.3 From d43f20bae5173ba431526040c320c36fdd4f086d Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Tue, 16 Jun 2020 12:08:46 +0800 Subject: fpga: dfl: fme: add interrupt support for global error reporting Error reporting interrupt is very useful to notify users that some errors are detected by the hardware. Once users are notified, they could query hardware logged error states, no need to continuously poll on these states. This patch adds interrupt support for fme global error reporting sub feature. It follows the common DFL interrupt notification and handling mechanism. And it implements two ioctls below for user to query number of irqs supported, and set/unset interrupt triggers. Ioctls: * DFL_FPGA_FME_ERR_GET_IRQ_NUM get the number of irqs, which is used to determine whether/how many interrupts fme error reporting feature supports. * DFL_FPGA_FME_ERR_SET_IRQ set/unset given eventfds as fme error reporting interrupt triggers. Signed-off-by: Luwei Kang Signed-off-by: Wu Hao Signed-off-by: Xu Yilun Reviewed-by: Marcelo Tosatti Acked-by: Wu Hao Signed-off-by: Moritz Fischer --- include/uapi/linux/fpga-dfl.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fpga-dfl.h b/include/uapi/linux/fpga-dfl.h index 6c71c9d7e926..b6495ea412ec 100644 --- a/include/uapi/linux/fpga-dfl.h +++ b/include/uapi/linux/fpga-dfl.h @@ -230,4 +230,27 @@ struct dfl_fpga_fme_port_pr { */ #define DFL_FPGA_FME_PORT_ASSIGN _IOW(DFL_FPGA_MAGIC, DFL_FME_BASE + 2, int) +/** + * DFL_FPGA_FME_ERR_GET_IRQ_NUM - _IOR(DFL_FPGA_MAGIC, DFL_FME_BASE + 3, + * __u32 num_irqs) + * + * Get the number of irqs supported by the fpga fme error reporting private + * feature. Currently hardware supports up to 1 irq. + * Return: 0 on success, -errno on failure. + */ +#define DFL_FPGA_FME_ERR_GET_IRQ_NUM _IOR(DFL_FPGA_MAGIC, \ + DFL_FME_BASE + 3, __u32) + +/** + * DFL_FPGA_FME_ERR_SET_IRQ - _IOW(DFL_FPGA_MAGIC, DFL_FME_BASE + 4, + * struct dfl_fpga_irq_set) + * + * Set fpga fme error reporting interrupt trigger if evtfds[n] is valid. + * Unset related interrupt trigger if evtfds[n] is a negative value. + * Return: 0 on success, -errno on failure. + */ +#define DFL_FPGA_FME_ERR_SET_IRQ _IOW(DFL_FPGA_MAGIC, \ + DFL_FME_BASE + 4, \ + struct dfl_fpga_irq_set) + #endif /* _UAPI_LINUX_FPGA_DFL_H */ -- cgit v1.2.3 From 09d86150141955ba1b3d7cbef23785f4996e4d6f Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Tue, 16 Jun 2020 12:08:47 +0800 Subject: fpga: dfl: afu: add AFU interrupt support AFU (Accelerated Function Unit) is dynamic region of the DFL based FPGA, and always defined by users. Some DFL based FPGA cards allow users to implement their own interrupts in AFU. In order to support this, hardware implements a new UINT (AFU Interrupt) private feature with related capability register which describes the number of supported AFU interrupts as well as the local index of the interrupts for software enumeration, and from software side, driver follows the common DFL interrupt notification and handling mechanism, and it implements two ioctls below for user to query number of irqs supported and set/unset interrupt triggers. Ioctls: * DFL_FPGA_PORT_UINT_GET_IRQ_NUM get the number of irqs, which is used to determine how many interrupts UINT feature supports. * DFL_FPGA_PORT_UINT_SET_IRQ set/unset eventfds as AFU interrupt triggers. Signed-off-by: Luwei Kang Signed-off-by: Wu Hao Signed-off-by: Xu Yilun Reviewed-by: Marcelo Tosatti Acked-by: Wu Hao Signed-off-by: Moritz Fischer --- include/uapi/linux/fpga-dfl.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fpga-dfl.h b/include/uapi/linux/fpga-dfl.h index b6495ea412ec..1621b077bf21 100644 --- a/include/uapi/linux/fpga-dfl.h +++ b/include/uapi/linux/fpga-dfl.h @@ -187,6 +187,29 @@ struct dfl_fpga_irq_set { DFL_PORT_BASE + 6, \ struct dfl_fpga_irq_set) +/** + * DFL_FPGA_PORT_UINT_GET_IRQ_NUM - _IOR(DFL_FPGA_MAGIC, DFL_PORT_BASE + 7, + * __u32 num_irqs) + * + * Get the number of irqs supported by the fpga AFU interrupt private + * feature. + * Return: 0 on success, -errno on failure. + */ +#define DFL_FPGA_PORT_UINT_GET_IRQ_NUM _IOR(DFL_FPGA_MAGIC, \ + DFL_PORT_BASE + 7, __u32) + +/** + * DFL_FPGA_PORT_UINT_SET_IRQ - _IOW(DFL_FPGA_MAGIC, DFL_PORT_BASE + 8, + * struct dfl_fpga_irq_set) + * + * Set fpga AFU interrupt trigger if evtfds[n] is valid. + * Unset related interrupt trigger if evtfds[n] is a negative value. + * Return: 0 on success, -errno on failure. + */ +#define DFL_FPGA_PORT_UINT_SET_IRQ _IOW(DFL_FPGA_MAGIC, \ + DFL_PORT_BASE + 8, \ + struct dfl_fpga_irq_set) + /* IOCTLs for FME file descriptor */ /** -- cgit v1.2.3 From 1ce50e7d408ef2bdc8ca021363fd46d1b8bfad00 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 6 Jul 2020 12:55:37 +0200 Subject: thermal: core: genetlink support for events/cmd/sampling Initially the thermal framework had a very simple notification mechanism to send generic netlink messages to the userspace. The notification function was never called from anywhere and the corresponding dead code was removed. It was probably a first attempt to introduce the netlink notification. At LPC2018, the presentation "Linux thermal: User kernel interface", proposed to create the notifications to the userspace via a kfifo. The advantage of the kfifo is the performance. It is usually used from a 1:1 communication channel where a driver captures data and sends it as fast as possible to a userspace process. The drawback is that only one process uses the notification channel exclusively, thus no other process is allowed to use the channel to get temperature or notifications. This patch defines a generic netlink API to discover the current thermal setup and adds event notifications as well as temperature sampling. As any genetlink protocol, it can evolve and the versioning allows to keep the backward compatibility. In order to prevent the user from getting flooded with data on a single channel, there are two multicast channels, one for the temperature sampling when the thermal zone is updated and another one for the events, so the user can get the events only without the thermal zone temperature sampling. Also, a list of commands to discover the thermal setup is added and can be extended when needed. Reviewed-by: Amit Kucheria Signed-off-by: Daniel Lezcano Acked-by: Zhang Rui Link: https://lore.kernel.org/r/20200706105538.2159-3-daniel.lezcano@linaro.org --- include/uapi/linux/thermal.h | 89 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 72 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index 96218378dda8..c105054cbb57 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -4,31 +4,86 @@ #define THERMAL_NAME_LENGTH 20 -/* Adding event notification support elements */ -#define THERMAL_GENL_FAMILY_NAME "thermal_event" -#define THERMAL_GENL_VERSION 0x01 -#define THERMAL_GENL_MCAST_GROUP_NAME "thermal_mc_grp" - -/* Events supported by Thermal Netlink */ -enum events { - THERMAL_AUX0, - THERMAL_AUX1, - THERMAL_CRITICAL, - THERMAL_DEV_FAULT, +enum thermal_device_mode { + THERMAL_DEVICE_DISABLED = 0, + THERMAL_DEVICE_ENABLED, +}; + +enum thermal_trip_type { + THERMAL_TRIP_ACTIVE = 0, + THERMAL_TRIP_PASSIVE, + THERMAL_TRIP_HOT, + THERMAL_TRIP_CRITICAL, }; -/* attributes of thermal_genl_family */ -enum { +/* Adding event notification support elements */ +#define THERMAL_GENL_FAMILY_NAME "thermal" +#define THERMAL_GENL_VERSION 0x01 +#define THERMAL_GENL_SAMPLING_GROUP_NAME "sampling" +#define THERMAL_GENL_EVENT_GROUP_NAME "event" + +/* Attributes of thermal_genl_family */ +enum thermal_genl_attr { THERMAL_GENL_ATTR_UNSPEC, - THERMAL_GENL_ATTR_EVENT, + THERMAL_GENL_ATTR_TZ, + THERMAL_GENL_ATTR_TZ_ID, + THERMAL_GENL_ATTR_TZ_TEMP, + THERMAL_GENL_ATTR_TZ_TRIP, + THERMAL_GENL_ATTR_TZ_TRIP_ID, + THERMAL_GENL_ATTR_TZ_TRIP_TYPE, + THERMAL_GENL_ATTR_TZ_TRIP_TEMP, + THERMAL_GENL_ATTR_TZ_TRIP_HYST, + THERMAL_GENL_ATTR_TZ_MODE, + THERMAL_GENL_ATTR_TZ_NAME, + THERMAL_GENL_ATTR_TZ_CDEV_WEIGHT, + THERMAL_GENL_ATTR_TZ_GOV, + THERMAL_GENL_ATTR_TZ_GOV_NAME, + THERMAL_GENL_ATTR_CDEV, + THERMAL_GENL_ATTR_CDEV_ID, + THERMAL_GENL_ATTR_CDEV_CUR_STATE, + THERMAL_GENL_ATTR_CDEV_MAX_STATE, + THERMAL_GENL_ATTR_CDEV_NAME, + THERMAL_GENL_ATTR_GOV_NAME, + __THERMAL_GENL_ATTR_MAX, }; #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1) -/* commands supported by the thermal_genl_family */ -enum { +enum thermal_genl_sampling { + THERMAL_GENL_SAMPLING_TEMP, + __THERMAL_GENL_SAMPLING_MAX, +}; +#define THERMAL_GENL_SAMPLING_MAX (__THERMAL_GENL_SAMPLING_MAX - 1) + +/* Events of thermal_genl_family */ +enum thermal_genl_event { + THERMAL_GENL_EVENT_UNSPEC, + THERMAL_GENL_EVENT_TZ_CREATE, /* Thermal zone creation */ + THERMAL_GENL_EVENT_TZ_DELETE, /* Thermal zone deletion */ + THERMAL_GENL_EVENT_TZ_DISABLE, /* Thermal zone disabed */ + THERMAL_GENL_EVENT_TZ_ENABLE, /* Thermal zone enabled */ + THERMAL_GENL_EVENT_TZ_TRIP_UP, /* Trip point crossed the way up */ + THERMAL_GENL_EVENT_TZ_TRIP_DOWN, /* Trip point crossed the way down */ + THERMAL_GENL_EVENT_TZ_TRIP_CHANGE, /* Trip point changed */ + THERMAL_GENL_EVENT_TZ_TRIP_ADD, /* Trip point added */ + THERMAL_GENL_EVENT_TZ_TRIP_DELETE, /* Trip point deleted */ + THERMAL_GENL_EVENT_CDEV_ADD, /* Cdev bound to the thermal zone */ + THERMAL_GENL_EVENT_CDEV_DELETE, /* Cdev unbound */ + THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, /* Cdev state updated */ + THERMAL_GENL_EVENT_TZ_GOV_CHANGE, /* Governor policy changed */ + __THERMAL_GENL_EVENT_MAX, +}; +#define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1) + +/* Commands supported by the thermal_genl_family */ +enum thermal_genl_cmd { THERMAL_GENL_CMD_UNSPEC, - THERMAL_GENL_CMD_EVENT, + THERMAL_GENL_CMD_TZ_GET_ID, /* List of thermal zones id */ + THERMAL_GENL_CMD_TZ_GET_TRIP, /* List of thermal trips */ + THERMAL_GENL_CMD_TZ_GET_TEMP, /* Get the thermal zone temperature */ + THERMAL_GENL_CMD_TZ_GET_GOV, /* Get the thermal zone governor */ + THERMAL_GENL_CMD_TZ_GET_MODE, /* Get the thermal zone mode */ + THERMAL_GENL_CMD_CDEV_GET, /* List of cdev id */ __THERMAL_GENL_CMD_MAX, }; #define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1) -- cgit v1.2.3 From f5836749c9c04a10decd2742845ad4870965fdef Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 6 Jul 2020 16:01:25 -0700 Subject: bpf: Add BPF_CGROUP_INET_SOCK_RELEASE hook Sometimes it's handy to know when the socket gets freed. In particular, we'd like to try to use a smarter allocation of ports for bpf_bind and explore the possibility of limiting the number of SOCK_DGRAM sockets the process can have. Implement BPF_CGROUP_INET_SOCK_RELEASE hook that triggers on inet socket release. It triggers only for userspace sockets (not in-kernel ones) and therefore has the same semantics as the existing BPF_CGROUP_INET_SOCK_CREATE. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200706230128.4073544-2-sdf@google.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index da9bf35a26f8..548a749aebb3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -226,6 +226,7 @@ enum bpf_attach_type { BPF_CGROUP_INET4_GETSOCKNAME, BPF_CGROUP_INET6_GETSOCKNAME, BPF_XDP_DEVMAP, + BPF_CGROUP_INET_SOCK_RELEASE, __MAX_BPF_ATTACH_TYPE }; -- cgit v1.2.3 From 82394db7383d33641f3f565bd79792fb41b1741f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 29 Jun 2020 12:06:37 -0700 Subject: block: add capacity field to zone descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the zoned storage model, the sectors within a zone are typically all writeable. With the introduction of the Zoned Namespace (ZNS) Command Set in the NVM Express organization, the model was extended to have a specific writeable capacity. Extend the zone descriptor data structure with a zone capacity field to indicate to the user how many sectors in a zone are writeable. Introduce backward compatibility in the zone report ioctl by extending the zone report header data structure with a flags field to indicate if the capacity field is available. Reviewed-by: Jens Axboe Reviewed-by: Javier González Reviewed-by: Chaitanya Kulkarni Reviewed-by: Himanshu Madhani Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Daniel Wagner Signed-off-by: Matias Bjørling Signed-off-by: Christoph Hellwig --- include/uapi/linux/blkzoned.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index 0cdef67135f0..42c3366cc25f 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -73,6 +73,15 @@ enum blk_zone_cond { BLK_ZONE_COND_OFFLINE = 0xF, }; +/** + * enum blk_zone_report_flags - Feature flags of reported zone descriptors. + * + * @BLK_ZONE_REP_CAPACITY: Zone descriptor has capacity field. + */ +enum blk_zone_report_flags { + BLK_ZONE_REP_CAPACITY = (1 << 0), +}; + /** * struct blk_zone - Zone descriptor for BLKREPORTZONE ioctl. * @@ -99,7 +108,9 @@ struct blk_zone { __u8 cond; /* Zone condition */ __u8 non_seq; /* Non-sequential write resources active */ __u8 reset; /* Reset write pointer recommended */ - __u8 reserved[36]; + __u8 resv[4]; + __u64 capacity; /* Zone capacity in number of sectors */ + __u8 reserved[24]; }; /** @@ -115,7 +126,7 @@ struct blk_zone { struct blk_zone_report { __u64 sector; __u32 nr_zones; - __u8 reserved[4]; + __u32 flags; struct blk_zone zones[0]; }; -- cgit v1.2.3 From 1aa561b1a4c0ae2a9a9b9c21a84b5ca66b4775d8 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 3 Jun 2020 16:56:21 -0700 Subject: kvm: x86: Add "last CPU" to some KVM_EXIT information More often than not, a failed VM-entry in an x86 production environment is induced by a defective CPU. To help identify the bad hardware, include the id of the last logical CPU to run a vCPU in the information provided to userspace on a KVM exit for failed VM-entry or for KVM internal errors not associated with emulation. The presence of this additional information is indicated by a new capability, KVM_CAP_LAST_CPU. Signed-off-by: Jim Mattson Reviewed-by: Oliver Upton Reviewed-by: Peter Shier Message-Id: <20200603235623.245638-5-jmattson@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4fdf30316582..ff9b335620d0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -289,6 +289,7 @@ struct kvm_run { /* KVM_EXIT_FAIL_ENTRY */ struct { __u64 hardware_entry_failure_reason; + __u32 cpu; } fail_entry; /* KVM_EXIT_EXCEPTION */ struct { @@ -1031,6 +1032,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_SECURE_GUEST 181 #define KVM_CAP_HALT_POLL 182 #define KVM_CAP_ASYNC_PF_INT 183 +#define KVM_CAP_LAST_CPU 184 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 065e0d42a0a728d7f6c2aec7c9f3e5dc7b715394 Mon Sep 17 00:00:00 2001 From: Meir Lichtinger Date: Mon, 6 Jul 2020 20:42:32 -0700 Subject: ethtool: Add support for 100Gbps per lane link modes Define 100G, 200G and 400G link modes using 100Gbps per lane LR, ER and FR are defined as a single link mode because they are using same technology and by design are fully interoperable. EEPROM content indicates if the module is LR, ER, or FR, and the user space ethtool decoder is planned to support decoding these modes in the EEPROM. Signed-off-by: Meir Lichtinger CC: Andrew Lunn Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index d1413538ef30..60856e0f9618 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1600,6 +1600,21 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT = 72, ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT = 73, ETHTOOL_LINK_MODE_FEC_LLRS_BIT = 74, + ETHTOOL_LINK_MODE_100000baseKR_Full_BIT = 75, + ETHTOOL_LINK_MODE_100000baseSR_Full_BIT = 76, + ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT = 77, + ETHTOOL_LINK_MODE_100000baseCR_Full_BIT = 78, + ETHTOOL_LINK_MODE_100000baseDR_Full_BIT = 79, + ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT = 80, + ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT = 81, + ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT = 82, + ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT = 83, + ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT = 84, + ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT = 85, + ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT = 86, + ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT = 87, + ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT = 88, + ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT = 89, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS }; -- cgit v1.2.3 From 6d5f904904608a9cd32854d7d0a4dd65b27f9935 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Thu, 9 Jul 2020 09:15:29 +0800 Subject: io_uring: export cq overflow status to userspace For those applications which are not willing to use io_uring_enter() to reap and handle cqes, they may completely rely on liburing's io_uring_peek_cqe(), but if cq ring has overflowed, currently because io_uring_peek_cqe() is not aware of this overflow, it won't enter kernel to flush cqes, below test program can reveal this bug: static void test_cq_overflow(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int issued = 0; int ret = 0; do { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); break;; } ret = io_uring_submit(ring); if (ret <= 0) { if (ret != -EBUSY) fprintf(stderr, "sqe submit failed: %d\n", ret); break; } issued++; } while (ret > 0); assert(ret == -EBUSY); printf("issued requests: %d\n", issued); while (issued) { ret = io_uring_peek_cqe(ring, &cqe); if (ret) { if (ret != -EAGAIN) { fprintf(stderr, "peek completion failed: %s\n", strerror(ret)); break; } printf("left requets: %d\n", issued); continue; } io_uring_cqe_seen(ring, cqe); issued--; printf("left requets: %d\n", issued); } } int main(int argc, char *argv[]) { int ret; struct io_uring ring; ret = io_uring_queue_init(16, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } test_cq_overflow(&ring); return 0; } To fix this issue, export cq overflow status to userspace by adding new IORING_SQ_CQ_OVERFLOW flag, then helper functions() in liburing, such as io_uring_peek_cqe, can be aware of this cq overflow and do flush accordingly. Signed-off-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 92c22699a5a7..7843742b8b74 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -197,6 +197,7 @@ struct io_sqring_offsets { * sq_ring->flags */ #define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ +#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */ struct io_cqring_offsets { __u32 head; -- cgit v1.2.3 From 3f935c75eb52dd968351dba824adf466fb9c9429 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 9 Jul 2020 15:12:39 +0200 Subject: inet_diag: support for wider protocol numbers After commit bf9765145b85 ("sock: Make sk_protocol a 16-bit value") the current size of 'sdiag_protocol' is not sufficient to represent the possible protocol values. This change introduces a new inet diag request attribute to let user space specify the relevant protocol number using u32 values. The attribute is parsed by inet diag core on get/dump command and the extended protocol value, if available, is preferred to 'sdiag_protocol' to lookup the diag handler. The parse attributed are exposed to all the diag handlers via the cb->data. Note that inet_diag_dump_one_icsk() is left unmodified, as it will not be used by protocol using the extended attribute. Suggested-by: David S. Miller Co-developed-by: Christoph Paasch Signed-off-by: Christoph Paasch Acked-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index e6f183ee8417..5ba122c1949a 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -65,6 +65,7 @@ enum { INET_DIAG_REQ_NONE, INET_DIAG_REQ_BYTECODE, INET_DIAG_REQ_SK_BPF_STORAGES, + INET_DIAG_REQ_PROTOCOL, __INET_DIAG_REQ_MAX, }; -- cgit v1.2.3 From ac3b45f6095452a9731f8825be1513d326dbfa15 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 9 Jul 2020 15:12:41 +0200 Subject: mptcp: add MPTCP socket diag interface exposes basic inet socket attribute, plus some MPTCP socket fields comprising PM status and MPTCP-level sequence numbers. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 5f2c77082d9e..9762660df741 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -86,4 +86,21 @@ enum { __MPTCP_PM_CMD_AFTER_LAST }; +#define MPTCP_INFO_FLAG_FALLBACK _BITUL(0) +#define MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED _BITUL(1) + +struct mptcp_info { + __u8 mptcpi_subflows; + __u8 mptcpi_add_addr_signal; + __u8 mptcpi_add_addr_accepted; + __u8 mptcpi_subflows_max; + __u8 mptcpi_add_addr_signal_max; + __u8 mptcpi_add_addr_accepted_max; + __u32 mptcpi_flags; + __u32 mptcpi_token; + __u64 mptcpi_write_seq; + __u64 mptcpi_snd_una; + __u64 mptcpi_rcv_nxt; +}; + #endif /* _UAPI_MPTCP_H */ -- cgit v1.2.3 From a21cf0a8330bba60e44ca6c99e1591042f336ff5 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 9 Jul 2020 16:18:18 +0300 Subject: devlink: Add a new devlink port lanes attribute and pass to netlink Add a new devlink port attribute that indicates the port's number of lanes. Drivers are expected to set it via devlink_port_attrs_set(), before registering the port. The attribute is not passed to user space in case the number of lanes is invalid (0). Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 87c83a82991b..f741ab8d9cf0 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -455,6 +455,8 @@ enum devlink_attr { DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER, /* string */ + DEVLINK_ATTR_PORT_LANES, /* u32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From a0f49b54865273c895be3826d6d59cbc5ad725c2 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 9 Jul 2020 16:18:20 +0300 Subject: devlink: Add a new devlink port split ability attribute and pass to netlink Add a new attribute that indicates the split ability of devlink port. Drivers are expected to set it via devlink_port_attrs_set(), before registering the port. Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index f741ab8d9cf0..cfef4245ea5a 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -456,6 +456,7 @@ enum devlink_attr { DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER, /* string */ DEVLINK_ATTR_PORT_LANES, /* u32 */ + DEVLINK_ATTR_PORT_SPLITTABLE, /* u8 */ /* add new attributes above here, update the policy in devlink.c */ -- cgit v1.2.3 From f794db6841e5480208f0c3a3ac1df445a96b079e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 9 Jul 2020 14:08:51 +0200 Subject: virt: vbox: Fix VBGL_IOCTL_VMMDEV_REQUEST_BIG and _LOG req numbers to match upstream Until this commit the mainline kernel version (this version) of the vboxguest module contained a bug where it defined VBGL_IOCTL_VMMDEV_REQUEST_BIG and VBGL_IOCTL_LOG using _IOC(_IOC_READ | _IOC_WRITE, 'V', ...) instead of _IO(V, ...) as the out of tree VirtualBox upstream version does. Since the VirtualBox userspace bits are always built against VirtualBox upstream's headers, this means that so far the mainline kernel version of the vboxguest module has been failing these 2 ioctls with -ENOTTY. I guess that VBGL_IOCTL_VMMDEV_REQUEST_BIG is never used causing us to not hit that one and sofar the vboxguest driver has failed to actually log any log messages passed it through VBGL_IOCTL_LOG. This commit changes the VBGL_IOCTL_VMMDEV_REQUEST_BIG and VBGL_IOCTL_LOG defines to match the out of tree VirtualBox upstream vboxguest version, while keeping compatibility with the old wrong request defines so as to not break the kernel ABI in case someone has been using the old request defines. Fixes: f6ddd094f579 ("virt: Add vboxguest driver for Virtual Box Guest integration UAPI") Cc: stable@vger.kernel.org Acked-by: Arnd Bergmann Reviewed-by: Arnd Bergmann Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200709120858.63928-2-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/vboxguest.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vboxguest.h b/include/uapi/linux/vboxguest.h index 9cec58a6a5ea..f79d7abe27db 100644 --- a/include/uapi/linux/vboxguest.h +++ b/include/uapi/linux/vboxguest.h @@ -103,7 +103,7 @@ VMMDEV_ASSERT_SIZE(vbg_ioctl_driver_version_info, 24 + 20); /* IOCTL to perform a VMM Device request larger then 1KB. */ -#define VBG_IOCTL_VMMDEV_REQUEST_BIG _IOC(_IOC_READ | _IOC_WRITE, 'V', 3, 0) +#define VBG_IOCTL_VMMDEV_REQUEST_BIG _IO('V', 3) /** VBG_IOCTL_HGCM_CONNECT data structure. */ @@ -198,7 +198,7 @@ struct vbg_ioctl_log { } u; }; -#define VBG_IOCTL_LOG(s) _IOC(_IOC_READ | _IOC_WRITE, 'V', 9, s) +#define VBG_IOCTL_LOG(s) _IO('V', 9) /** VBG_IOCTL_WAIT_FOR_EVENTS data structure. */ -- cgit v1.2.3 From 631beddc5466731b048263a4a9d3d67150e72f8d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 9 Jul 2020 14:08:55 +0200 Subject: virt: vbox: Add support for the new VBG_IOCTL_ACQUIRE_GUEST_CAPABILITIES ioctl Add support for the new VBG_IOCTL_ACQUIRE_GUEST_CAPABILITIES ioctl, this is necessary for automatic resizing of the guest resolution to match the VM-window size to work with the new VMSVGA virtual GPU which is now the new default in VirtualBox. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1789545 Acked-by: Arnd Bergmann Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200709120858.63928-6-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/vboxguest.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vboxguest.h b/include/uapi/linux/vboxguest.h index f79d7abe27db..15125f6ec60d 100644 --- a/include/uapi/linux/vboxguest.h +++ b/include/uapi/linux/vboxguest.h @@ -257,6 +257,30 @@ VMMDEV_ASSERT_SIZE(vbg_ioctl_change_filter, 24 + 8); _IOWR('V', 12, struct vbg_ioctl_change_filter) +/** VBG_IOCTL_ACQUIRE_GUEST_CAPABILITIES data structure. */ +struct vbg_ioctl_acquire_guest_caps { + /** The header. */ + struct vbg_ioctl_hdr hdr; + union { + struct { + /** Flags (VBGL_IOC_AGC_FLAGS_XXX). */ + __u32 flags; + /** Capabilities to set (VMMDEV_GUEST_SUPPORTS_XXX). */ + __u32 or_mask; + /** Capabilities to drop (VMMDEV_GUEST_SUPPORTS_XXX). */ + __u32 not_mask; + } in; + } u; +}; +VMMDEV_ASSERT_SIZE(vbg_ioctl_acquire_guest_caps, 24 + 12); + +#define VBGL_IOC_AGC_FLAGS_CONFIG_ACQUIRE_MODE 0x00000001 +#define VBGL_IOC_AGC_FLAGS_VALID_MASK 0x00000001 + +#define VBG_IOCTL_ACQUIRE_GUEST_CAPABILITIES \ + _IOWR('V', 13, struct vbg_ioctl_acquire_guest_caps) + + /** VBG_IOCTL_CHANGE_GUEST_CAPABILITIES data structure. */ struct vbg_ioctl_set_guest_caps { /** The header. */ -- cgit v1.2.3 From 316b0035402f05fe9e9e5334d1ff65dae285cb7c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 9 Jul 2020 14:08:56 +0200 Subject: virt: vbox: Add a few new vmmdev request types to the userspace whitelist Upstream VirtualBox has defined and is using a few new request types for vmmdev requests passed through /dev/vboxguest to the hypervisor. Add the defines for these to vbox_vmmdev_types.h and add add them to the whitelists of vmmdev requests which userspace is allowed to make. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1789545 Acked-by: Arnd Bergmann Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200709120858.63928-7-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/vbox_vmmdev_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vbox_vmmdev_types.h b/include/uapi/linux/vbox_vmmdev_types.h index c27289fd619a..f8a8d6b3c521 100644 --- a/include/uapi/linux/vbox_vmmdev_types.h +++ b/include/uapi/linux/vbox_vmmdev_types.h @@ -63,6 +63,7 @@ enum vmmdev_request_type { VMMDEVREQ_SET_GUEST_CAPABILITIES = 56, VMMDEVREQ_VIDEMODE_SUPPORTED2 = 57, /* since version 3.2.0 */ VMMDEVREQ_GET_DISPLAY_CHANGE_REQEX = 80, /* since version 4.2.4 */ + VMMDEVREQ_GET_DISPLAY_CHANGE_REQ_MULTI = 81, VMMDEVREQ_HGCM_CONNECT = 60, VMMDEVREQ_HGCM_DISCONNECT = 61, VMMDEVREQ_HGCM_CALL32 = 62, @@ -92,6 +93,8 @@ enum vmmdev_request_type { VMMDEVREQ_WRITE_COREDUMP = 218, VMMDEVREQ_GUEST_HEARTBEAT = 219, VMMDEVREQ_HEARTBEAT_CONFIGURE = 220, + VMMDEVREQ_NT_BUG_CHECK = 221, + VMMDEVREQ_VIDEO_UPDATE_MONITOR_POSITIONS = 222, /* Ensure the enum is a 32 bit data-type */ VMMDEVREQ_SIZEHACK = 0x7fffffff }; -- cgit v1.2.3 From 04aaca197f16aff608e19ee98b1e55f535d746be Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 17 Jun 2020 17:33:13 +0900 Subject: char: raw: do not leak CONFIG_MAX_RAW_DEVS to userspace include/uapi/linux/raw.h leaks CONFIG_MAX_RAW_DEVS to userspace. Userspace programs cannot use MAX_RAW_MINORS since CONFIG_MAX_RAW_DEVS is not available anyway. Remove the MAX_RAW_MINORS definition from the exported header, and use CONFIG_MAX_RAW_DEVS in drivers/char/raw.c While I was here, I converted printk(KERN_WARNING ...) to pr_warn(...) and stretched the warning message. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20200617083313.183184-1-masahiroy@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/raw.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/raw.h b/include/uapi/linux/raw.h index dc96dda479d6..47874919d0b9 100644 --- a/include/uapi/linux/raw.h +++ b/include/uapi/linux/raw.h @@ -14,6 +14,4 @@ struct raw_config_request __u64 block_minor; }; -#define MAX_RAW_MINORS CONFIG_MAX_RAW_DEVS - #endif /* __LINUX_RAW_H */ -- cgit v1.2.3 From c7d759eb7b12f91a25f4d3cd03ff5209046ddfc2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 9 Jul 2020 17:42:47 -0700 Subject: ethtool: add tunnel info interface Add an interface to report offloaded UDP ports via ethtool netlink. Now that core takes care of tracking which UDP tunnel ports the NICs are aware of we can quite easily export this information out to user space. The responsibility of writing the netlink dumps is split between ethtool code and udp_tunnel_nic.c - since udp_tunnel module may not always be loaded, yet we should always report the capabilities of the NIC. $ ethtool --show-tunnels eth0 Tunnel information for eth0: UDP port table 0: Size: 4 Types: vxlan No entries UDP port table 1: Size: 4 Types: geneve, vxlan-gpe Entries (1): port 1230, vxlan-gpe v4: - back to v2, build fix is now directly in udp_tunnel.h v3: - don't compile ETHTOOL_MSG_TUNNEL_INFO_GET in if CONFIG_INET not set. v2: - fix string set count, - reorder enums in the uAPI, - fix type of ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES to bitset in docs and comments. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 2 ++ include/uapi/linux/ethtool_netlink.h | 55 ++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 60856e0f9618..b4f2d134e713 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -669,6 +669,7 @@ enum ethtool_link_ext_substate_cable_issue { * @ETH_SS_SOF_TIMESTAMPING: SOF_TIMESTAMPING_* flags * @ETH_SS_TS_TX_TYPES: timestamping Tx types * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters + * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types */ enum ethtool_stringset { ETH_SS_TEST = 0, @@ -686,6 +687,7 @@ enum ethtool_stringset { ETH_SS_SOF_TIMESTAMPING, ETH_SS_TS_TX_TYPES, ETH_SS_TS_RX_FILTERS, + ETH_SS_UDP_TUNNEL_TYPES, /* add new constants above here */ ETH_SS_COUNT diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index c12ce4df4b6b..5dcd24cb33ea 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -41,6 +41,7 @@ enum { ETHTOOL_MSG_TSINFO_GET, ETHTOOL_MSG_CABLE_TEST_ACT, ETHTOOL_MSG_CABLE_TEST_TDR_ACT, + ETHTOOL_MSG_TUNNEL_INFO_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -556,6 +557,60 @@ enum { ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 }; +/* TUNNEL INFO */ + +enum { + ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, + ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, + ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE, + + __ETHTOOL_UDP_TUNNEL_TYPE_CNT +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, /* be16 */ + ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT, + ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, /* u32 */ + ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, /* bitset */ + ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, /* nest - _UDP_ENTRY_* */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_TABLE_CNT, + ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_TABLE, /* nest - _UDP_TABLE_* */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_CNT, + ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_INFO_UNSPEC, + ETHTOOL_A_TUNNEL_INFO_HEADER, /* nest - _A_HEADER_* */ + + ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, /* nest - _UDP_TABLE */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_INFO_CNT, + ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3 From 3edd68399dc155b80335244c8c2673eaa652931a Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Fri, 10 Jul 2020 17:48:11 +0200 Subject: KVM: x86: Add a capability for GUEST_MAXPHYADDR < HOST_MAXPHYADDR support This patch adds a new capability KVM_CAP_SMALLER_MAXPHYADDR which allows userspace to query if the underlying architecture would support GUEST_MAXPHYADDR < HOST_MAXPHYADDR and hence act accordingly (e.g. qemu can decide if it should warn for -cpu ..,phys-bits=X) The complications in this patch are due to unexpected (but documented) behaviour we see with NPF vmexit handling in AMD processor. If SVM is modified to add guest physical address checks in the NPF and guest #PF paths, we see the followning error multiple times in the 'access' test in kvm-unit-tests: test pte.p pte.36 pde.p: FAIL: pte 2000021 expected 2000001 Dump mapping: address: 0x123400000000 ------L4: 24c3027 ------L3: 24c4027 ------L2: 24c5021 ------L1: 1002000021 This is because the PTE's accessed bit is set by the CPU hardware before the NPF vmexit. This is handled completely by hardware and cannot be fixed in software. Therefore, availability of the new capability depends on a boolean variable allow_smaller_maxphyaddr which is set individually by VMX and SVM init routines. On VMX it's always set to true, on SVM it's only set to true when NPT is not enabled. CC: Tom Lendacky CC: Babu Moger Signed-off-by: Mohammed Gamal Message-Id: <20200710154811.418214-10-mgamal@redhat.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ff9b335620d0..2c73dcfb3dbb 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1033,6 +1033,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HALT_POLL 182 #define KVM_CAP_ASYNC_PF_INT 183 #define KVM_CAP_LAST_CPU 184 +#define KVM_CAP_SMALLER_MAXPHYADDR 185 + #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 47e33c05f9f07cac3de833e531bcac9ae052c7ca Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 15 Jun 2020 15:42:46 -0700 Subject: seccomp: Fix ioctl number for SECCOMP_IOCTL_NOTIF_ID_VALID When SECCOMP_IOCTL_NOTIF_ID_VALID was first introduced it had the wrong direction flag set. While this isn't a big deal as nothing currently enforces these bits in the kernel, it should be defined correctly. Fix the define and provide support for the old command until it is no longer needed for backward compatibility. Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") Signed-off-by: Kees Cook --- include/uapi/linux/seccomp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index c1735455bc53..965290f7dcc2 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -123,5 +123,6 @@ struct seccomp_notif_resp { #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ struct seccomp_notif_resp) -#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) +#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) + #endif /* _UAPI_LINUX_SECCOMP_H */ -- cgit v1.2.3 From 21249616f02dc3f4c5efc3b3e9ba48e428b0131c Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Wed, 8 Jul 2020 12:15:57 +0800 Subject: gpio: uapi: fix misplaced comment line The second line of the description for event_type is before the first. Move it to after the first line. Signed-off-by: Kent Gibson Signed-off-by: Bartosz Golaszewski --- include/uapi/linux/gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index 0206383c0383..9c27cecf406f 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -71,8 +71,8 @@ enum { * of a GPIO line * @info: updated line information * @timestamp: estimate of time of status change occurrence, in nanoseconds - * and GPIOLINE_CHANGED_CONFIG * @event_type: one of GPIOLINE_CHANGED_REQUESTED, GPIOLINE_CHANGED_RELEASED + * and GPIOLINE_CHANGED_CONFIG * * Note: struct gpioline_info embedded here has 32-bit alignment on its own, * but it works fine with 64-bit alignment too. With its 72 byte size, we can -- cgit v1.2.3 From c1326210477ecc06c53221f0005c64419aba30d6 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:20 +0000 Subject: nfs,nfsd: NFSv4.2 extended attribute protocol definitions Add definitions for the new operations, errors and flags as defined in RFC 8276 (File System Extended Attributes in NFSv4). Signed-off-by: Frank van der Linden Signed-off-by: Chuck Lever --- include/uapi/linux/nfs4.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 8572930cf5b0..bf197e99b98f 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -33,6 +33,9 @@ #define NFS4_ACCESS_EXTEND 0x0008 #define NFS4_ACCESS_DELETE 0x0010 #define NFS4_ACCESS_EXECUTE 0x0020 +#define NFS4_ACCESS_XAREAD 0x0040 +#define NFS4_ACCESS_XAWRITE 0x0080 +#define NFS4_ACCESS_XALIST 0x0100 #define NFS4_FH_PERSISTENT 0x0000 #define NFS4_FH_NOEXPIRE_WITH_OPEN 0x0001 -- cgit v1.2.3 From 95ad37f90c338e3fd4abf61cecfe02b6f3e080f0 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:04 +0000 Subject: NFSv4.2: add client side xattr caching. Implement client side caching for NFSv4.2 extended attributes. The cache is a per-inode hashtable, with name/value entries. There is one special entry for the listxattr cache. NFS inodes have a pointer to a cache structure. The cache structure is allocated on demand, freed when the cache is invalidated. Memory shrinkers keep the size in check. Large entries (> PAGE_SIZE) are collected by a separate shrinker, and freed more aggressively than others. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- include/uapi/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfs_fs.h b/include/uapi/linux/nfs_fs.h index 7bcc8cd6831d..3afe3767c55d 100644 --- a/include/uapi/linux/nfs_fs.h +++ b/include/uapi/linux/nfs_fs.h @@ -56,6 +56,7 @@ #define NFSDBG_PNFS 0x1000 #define NFSDBG_PNFS_LD 0x2000 #define NFSDBG_STATE 0x4000 +#define NFSDBG_XATTRCACHE 0x8000 #define NFSDBG_ALL 0xFFFF -- cgit v1.2.3 From 8aa5a33578e9685d06020bd10d1637557423e945 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Wed, 8 Jul 2020 07:28:33 +0000 Subject: xsk: Add new statistics It can be useful for the user to know the reason behind a dropped packet. Introduce new counters which track drops on the receive path caused by: 1. rx ring being full 2. fill ring being empty Also, on the tx path introduce a counter which tracks the number of times we attempt pull from the tx ring when it is empty. Signed-off-by: Ciara Loftus Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200708072835.4427-2-ciara.loftus@intel.com --- include/uapi/linux/if_xdp.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index be328c59389d..a78a8096f4ce 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -73,9 +73,12 @@ struct xdp_umem_reg { }; struct xdp_statistics { - __u64 rx_dropped; /* Dropped for reasons other than invalid desc */ + __u64 rx_dropped; /* Dropped for other reasons */ __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */ __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ + __u64 rx_ring_full; /* Dropped due to rx ring being full */ + __u64 rx_fill_ring_empty_descs; /* Failed to retrieve item from fill ring */ + __u64 tx_ring_empty_descs; /* Failed to retrieve item from tx ring */ }; struct xdp_options { -- cgit v1.2.3 From 0d80cb4612aa32dc0faa17fa3ab6f96f33e2b4a7 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Wed, 8 Jul 2020 07:28:35 +0000 Subject: xsk: Add xdp statistics to xsk_diag Add xdp statistics to the information dumped through the xsk_diag interface Signed-off-by: Ciara Loftus Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200708072835.4427-4-ciara.loftus@intel.com --- include/uapi/linux/xdp_diag.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xdp_diag.h b/include/uapi/linux/xdp_diag.h index 78b2591a7782..66b9973b4f4c 100644 --- a/include/uapi/linux/xdp_diag.h +++ b/include/uapi/linux/xdp_diag.h @@ -30,6 +30,7 @@ struct xdp_diag_msg { #define XDP_SHOW_RING_CFG (1 << 1) #define XDP_SHOW_UMEM (1 << 2) #define XDP_SHOW_MEMINFO (1 << 3) +#define XDP_SHOW_STATS (1 << 4) enum { XDP_DIAG_NONE, @@ -41,6 +42,7 @@ enum { XDP_DIAG_UMEM_FILL_RING, XDP_DIAG_UMEM_COMPLETION_RING, XDP_DIAG_MEMINFO, + XDP_DIAG_STATS, __XDP_DIAG_MAX, }; @@ -69,4 +71,13 @@ struct xdp_diag_umem { __u32 refs; }; +struct xdp_diag_stats { + __u64 n_rx_dropped; + __u64 n_rx_invalid; + __u64 n_rx_full; + __u64 n_fill_ring_empty; + __u64 n_tx_invalid; + __u64 n_tx_ring_empty; +}; + #endif /* _LINUX_XDP_DIAG_H */ -- cgit v1.2.3 From ed757328c34015be4ec51861a90bb3bcc807ad58 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 13 Jul 2020 12:24:18 +0200 Subject: atm: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: David S. Miller --- include/uapi/linux/atmioc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/atmioc.h b/include/uapi/linux/atmioc.h index cd7655e40c77..a9030bcc8d56 100644 --- a/include/uapi/linux/atmioc.h +++ b/include/uapi/linux/atmioc.h @@ -5,7 +5,7 @@ /* - * See http://icawww1.epfl.ch/linux-atm/magic.html for the complete list of + * See https://icawww1.epfl.ch/linux-atm/magic.html for the complete list of * "magic" ioctl numbers. */ -- cgit v1.2.3 From 2801758391ba6b0c20e253b956355e1b15ad85a2 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:48 +0200 Subject: bridge: uapi: mrp: Extend MRP attributes for MRP interconnect Extend the existing MRP netlink attributes to allow to configure MRP Interconnect: IFLA_BRIDGE_MRP_IN_ROLE - the parameter type is br_mrp_in_role which contains the interconnect id, the ring id, the interconnect role(MIM or MIC) and the port ifindex that represents the interconnect port. IFLA_BRIDGE_MRP_IN_STATE - the parameter type is br_mrp_in_state which contains the interconnect id and the interconnect state. IFLA_BRIDGE_MRP_IN_TEST - the parameter type is br_mrp_start_in_test which contains the interconnect id, the interval at which to send MRP_InTest frames, how many test frames can be missed before declaring the interconnect ring open and the period which represents for how long to send MRP_InTest frames. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 53 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/mrp_bridge.h | 38 +++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index c114c1c2bd53..d840a3e37a37 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -167,6 +167,9 @@ enum { IFLA_BRIDGE_MRP_RING_ROLE, IFLA_BRIDGE_MRP_START_TEST, IFLA_BRIDGE_MRP_INFO, + IFLA_BRIDGE_MRP_IN_ROLE, + IFLA_BRIDGE_MRP_IN_STATE, + IFLA_BRIDGE_MRP_START_IN_TEST, __IFLA_BRIDGE_MRP_MAX, }; @@ -245,6 +248,37 @@ enum { #define IFLA_BRIDGE_MRP_INFO_MAX (__IFLA_BRIDGE_MRP_INFO_MAX - 1) +enum { + IFLA_BRIDGE_MRP_IN_STATE_UNSPEC, + IFLA_BRIDGE_MRP_IN_STATE_IN_ID, + IFLA_BRIDGE_MRP_IN_STATE_STATE, + __IFLA_BRIDGE_MRP_IN_STATE_MAX, +}; + +#define IFLA_BRIDGE_MRP_IN_STATE_MAX (__IFLA_BRIDGE_MRP_IN_STATE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_IN_ROLE_UNSPEC, + IFLA_BRIDGE_MRP_IN_ROLE_RING_ID, + IFLA_BRIDGE_MRP_IN_ROLE_IN_ID, + IFLA_BRIDGE_MRP_IN_ROLE_ROLE, + IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX, + __IFLA_BRIDGE_MRP_IN_ROLE_MAX, +}; + +#define IFLA_BRIDGE_MRP_IN_ROLE_MAX (__IFLA_BRIDGE_MRP_IN_ROLE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC, + IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID, + IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL, + IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS, + IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD, + __IFLA_BRIDGE_MRP_START_IN_TEST_MAX, +}; + +#define IFLA_BRIDGE_MRP_START_IN_TEST_MAX (__IFLA_BRIDGE_MRP_START_IN_TEST_MAX - 1) + struct br_mrp_instance { __u32 ring_id; __u32 p_ifindex; @@ -270,6 +304,25 @@ struct br_mrp_start_test { __u32 monitor; }; +struct br_mrp_in_state { + __u32 in_state; + __u16 in_id; +}; + +struct br_mrp_in_role { + __u32 ring_id; + __u32 in_role; + __u32 i_ifindex; + __u16 in_id; +}; + +struct br_mrp_start_in_test { + __u32 interval; + __u32 max_miss; + __u32 period; + __u16 in_id; +}; + struct bridge_stp_xstats { __u64 transition_blk; __u64 transition_fwd; diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h index bee366540212..6aeb13ef0b1e 100644 --- a/include/uapi/linux/mrp_bridge.h +++ b/include/uapi/linux/mrp_bridge.h @@ -21,11 +21,22 @@ enum br_mrp_ring_role_type { BR_MRP_RING_ROLE_MRA, }; +enum br_mrp_in_role_type { + BR_MRP_IN_ROLE_DISABLED, + BR_MRP_IN_ROLE_MIC, + BR_MRP_IN_ROLE_MIM, +}; + enum br_mrp_ring_state_type { BR_MRP_RING_STATE_OPEN, BR_MRP_RING_STATE_CLOSED, }; +enum br_mrp_in_state_type { + BR_MRP_IN_STATE_OPEN, + BR_MRP_IN_STATE_CLOSED, +}; + enum br_mrp_port_state_type { BR_MRP_PORT_STATE_DISABLED, BR_MRP_PORT_STATE_BLOCKED, @@ -36,6 +47,7 @@ enum br_mrp_port_state_type { enum br_mrp_port_role_type { BR_MRP_PORT_ROLE_PRIMARY, BR_MRP_PORT_ROLE_SECONDARY, + BR_MRP_PORT_ROLE_INTER, }; enum br_mrp_tlv_header_type { @@ -45,6 +57,10 @@ enum br_mrp_tlv_header_type { BR_MRP_TLV_HEADER_RING_TOPO = 0x3, BR_MRP_TLV_HEADER_RING_LINK_DOWN = 0x4, BR_MRP_TLV_HEADER_RING_LINK_UP = 0x5, + BR_MRP_TLV_HEADER_IN_TEST = 0x6, + BR_MRP_TLV_HEADER_IN_TOPO = 0x7, + BR_MRP_TLV_HEADER_IN_LINK_DOWN = 0x8, + BR_MRP_TLV_HEADER_IN_LINK_UP = 0x9, BR_MRP_TLV_HEADER_OPTION = 0x7f, }; @@ -118,4 +134,26 @@ struct br_mrp_oui_hdr { __u8 oui[MRP_OUI_LENGTH]; }; +struct br_mrp_in_test_hdr { + __be16 id; + __u8 sa[ETH_ALEN]; + __be16 port_role; + __be16 state; + __be16 transitions; + __be32 timestamp; +}; + +struct br_mrp_in_topo_hdr { + __u8 sa[ETH_ALEN]; + __be16 id; + __be16 interval; +}; + +struct br_mrp_in_link_hdr { + __u8 sa[ETH_ALEN]; + __be16 port_role; + __be16 id; + __be16 interval; +}; + #endif -- cgit v1.2.3 From 559139cb0405d38816e5e725adee9000db993235 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:56 +0200 Subject: bridge: uapi: mrp: Extend MRP_INFO attributes for interconnect status Extend the existing MRP_INFO to return status of MRP interconnect. In case there is no MRP interconnect on the node then the role will be disabled so the other attributes can be ignored. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index d840a3e37a37..c1227aecd38f 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -243,6 +243,11 @@ enum { IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL, IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS, IFLA_BRIDGE_MRP_INFO_TEST_MONITOR, + IFLA_BRIDGE_MRP_INFO_I_IFINDEX, + IFLA_BRIDGE_MRP_INFO_IN_STATE, + IFLA_BRIDGE_MRP_INFO_IN_ROLE, + IFLA_BRIDGE_MRP_INFO_IN_TEST_INTERVAL, + IFLA_BRIDGE_MRP_INFO_IN_TEST_MAX_MISS, __IFLA_BRIDGE_MRP_INFO_MAX, }; -- cgit v1.2.3 From ffb3adba64801f70c472303c9e386eb5eaec193d Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:58 +0200 Subject: net: bridge: Add port attribute IFLA_BRPORT_MRP_IN_OPEN This patch adds a new port attribute, IFLA_BRPORT_MRP_IN_OPEN, which allows to notify the userspace when the node lost the contiuity of MRP_InTest frames. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index cc185a007ade..26842ffd0501 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -344,6 +344,7 @@ enum { IFLA_BRPORT_ISOLATED, IFLA_BRPORT_BACKUP_PORT, IFLA_BRPORT_MRP_RING_OPEN, + IFLA_BRPORT_MRP_IN_OPEN, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) -- cgit v1.2.3 From 7cf97b12545503992020796c74bd84078eb39299 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Tue, 2 Jun 2020 18:10:43 -0700 Subject: seccomp: Introduce addfd ioctl to seccomp user notifier The current SECCOMP_RET_USER_NOTIF API allows for syscall supervision over an fd. It is often used in settings where a supervising task emulates syscalls on behalf of a supervised task in userspace, either to further restrict the supervisee's syscall abilities or to circumvent kernel enforced restrictions the supervisor deems safe to lift (e.g. actually performing a mount(2) for an unprivileged container). While SECCOMP_RET_USER_NOTIF allows for the interception of any syscall, only a certain subset of syscalls could be correctly emulated. Over the last few development cycles, the set of syscalls which can't be emulated has been reduced due to the addition of pidfd_getfd(2). With this we are now able to, for example, intercept syscalls that require the supervisor to operate on file descriptors of the supervisee such as connect(2). However, syscalls that cause new file descriptors to be installed can not currently be correctly emulated since there is no way for the supervisor to inject file descriptors into the supervisee. This patch adds a new addfd ioctl to remove this restriction by allowing the supervisor to install file descriptors into the intercepted task. By implementing this feature via seccomp the supervisor effectively instructs the supervisee to install a set of file descriptors into its own file descriptor table during the intercepted syscall. This way it is possible to intercept syscalls such as open() or accept(), and install (or replace, like dup2(2)) the supervisor's resulting fd into the supervisee. One replacement use-case would be to redirect the stdout and stderr of a supervisee into log file descriptors opened by the supervisor. The ioctl handling is based on the discussions[1] of how Extensible Arguments should interact with ioctls. Instead of building size into the addfd structure, make it a function of the ioctl command (which is how sizes are normally passed to ioctls). To support forward and backward compatibility, just mask out the direction and size, and match everything. The size (and any future direction) checks are done along with copy_struct_from_user() logic. As a note, the seccomp_notif_addfd structure is laid out based on 8-byte alignment without requiring packing as there have been packing issues with uapi highlighted before[2][3]. Although we could overload the newfd field and use -1 to indicate that it is not to be used, doing so requires changing the size of the fd field, and introduces struct packing complexity. [1]: https://lore.kernel.org/lkml/87o8w9bcaf.fsf@mid.deneb.enyo.de/ [2]: https://lore.kernel.org/lkml/a328b91d-fd8f-4f27-b3c2-91a9c45f18c0@rasmusvillemoes.dk/ [3]: https://lore.kernel.org/lkml/20200612104629.GA15814@ircssh-2.c.rugged-nimbus-611.internal Cc: Christoph Hellwig Cc: Christian Brauner Cc: Tycho Andersen Cc: Jann Horn Cc: Robert Sesek Cc: Chris Palmer Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org Suggested-by: Matt Denton Link: https://lore.kernel.org/r/20200603011044.7972-4-sargun@sargun.me Signed-off-by: Sargun Dhillon Reviewed-by: Will Drewry Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- include/uapi/linux/seccomp.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 965290f7dcc2..6ba18b82a02e 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -113,6 +113,25 @@ struct seccomp_notif_resp { __u32 flags; }; +/* valid flags for seccomp_notif_addfd */ +#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ + +/** + * struct seccomp_notif_addfd + * @id: The ID of the seccomp notification + * @flags: SECCOMP_ADDFD_FLAG_* + * @srcfd: The local fd number + * @newfd: Optional remote FD number if SETFD option is set, otherwise 0. + * @newfd_flags: The O_* flags the remote FD should have applied + */ +struct seccomp_notif_addfd { + __u64 id; + __u32 flags; + __u32 srcfd; + __u32 newfd; + __u32 newfd_flags; +}; + #define SECCOMP_IOC_MAGIC '!' #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) @@ -124,5 +143,8 @@ struct seccomp_notif_resp { #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ struct seccomp_notif_resp) #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) +/* On success, the return value is the remote process's added fd number */ +#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ + struct seccomp_notif_addfd) #endif /* _UAPI_LINUX_SECCOMP_H */ -- cgit v1.2.3 From 938a0650aae6275ba8e924685836bdee2c6aa3db Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Wed, 13 May 2020 08:19:29 -0400 Subject: drm/amdkfd: Provide SMI events watch When the compute is malfunctioning or performance drops, the system admin will use SMI (System Management Interface) tool to monitor/diagnostic what went wrong. This patch provides an event watch interface for the user space to register devices and subscribe events they are interested. After registered, the user can use annoymous file descriptor's poll function with wait-time specified and wait for events to happen. Once an event happens, the user can use read() to retrieve information related to the event. VM fault event is done in this patch. v2: - remove UNREGISTER and add event ENABLE/DISABLE - correct kfifo usage - move event message API to kfd_ioctl.h v3: send the event msg in text than in binary v4: support multiple clients v5: move events enablement from ioctl to fd write v6: sparse fix Signed-off-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index b6be62356d34..733c183d165e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -442,6 +442,17 @@ struct kfd_ioctl_import_dmabuf_args { __u32 dmabuf_fd; /* to KFD */ }; +/* + * KFD SMI(System Management Interface) events + */ +/* Event type (defined by bitmask) */ +#define KFD_SMI_EVENT_VMFAULT 0x0000000000000001 + +struct kfd_ioctl_smi_events_args { + __u32 gpuid; /* to KFD */ + __u32 anon_fd; /* from KFD */ +}; + /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { @@ -546,7 +557,10 @@ enum kfd_mmio_remap { #define AMDKFD_IOC_ALLOC_QUEUE_GWS \ AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) +#define AMDKFD_IOC_SMI_EVENTS \ + AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1F +#define AMDKFD_COMMAND_END 0x20 #endif -- cgit v1.2.3 From 91e2c1919230c719c32a3076657f51d4c6074513 Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Mon, 20 Apr 2020 19:42:46 -0400 Subject: include/uapi/linux: Update KFD ioctl version Bump KFD ioctl after adding SMI events support Signed-off-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 733c183d165e..f738c3b53f4e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -26,8 +26,12 @@ #include #include +/* + * - 1.1 - initial version + * - 1.3 - Add SMI events support + */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 1 +#define KFD_IOCTL_MINOR_VERSION 3 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ -- cgit v1.2.3 From c201324b54553aebb32845193680f21eb493c6e5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:42:42 -0700 Subject: net: caif: drop duplicate words in comments Drop doubled words "or" and "the" in several comments. Signed-off-by: Randy Dunlap Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/caif/caif_socket.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/caif/caif_socket.h b/include/uapi/linux/caif/caif_socket.h index 10ec1d1cf68e..d9970bbaa156 100644 --- a/include/uapi/linux/caif/caif_socket.h +++ b/include/uapi/linux/caif/caif_socket.h @@ -169,7 +169,7 @@ struct sockaddr_caif { * @CAIFSO_LINK_SELECT: Selector used if multiple CAIF Link layers are * available. Either a high bandwidth * link can be selected (CAIF_LINK_HIGH_BANDW) or - * or a low latency link (CAIF_LINK_LOW_LATENCY). + * a low latency link (CAIF_LINK_LOW_LATENCY). * This option is of type __u32. * Alternatively SO_BINDTODEVICE can be used. * -- cgit v1.2.3 From 644bfe51fa49c22244d24e896cd3fe3ee2f2cfd1 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 14 Jul 2020 15:56:37 +0200 Subject: cpumap: Formalize map value as a named struct As it has been already done for devmap, introduce 'struct bpf_cpumap_val' to formalize the expected values that can be passed in for a CPUMAP. Update cpumap code to use the struct. Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/754f950674665dae6139c061d28c1d982aaf4170.1594734381.git.lorenzo@kernel.org --- include/uapi/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5e386389913a..109623527358 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3849,6 +3849,15 @@ struct bpf_devmap_val { } bpf_prog; }; +/* CPUMAP map-value layout + * + * The struct data-layout of map-value is a configuration interface. + * New members can only be added to the end of this structure. + */ +struct bpf_cpumap_val { + __u32 qsize; /* queue size to remote target CPU */ +}; + enum sk_action { SK_DROP = 0, SK_PASS, -- cgit v1.2.3 From 9216477449f33cdbc9c9a99d49f500b7fbb81702 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 14 Jul 2020 15:56:38 +0200 Subject: bpf: cpumap: Add the possibility to attach an eBPF program to cpumap Introduce the capability to attach an eBPF program to cpumap entries. The idea behind this feature is to add the possibility to define on which CPU run the eBPF program if the underlying hw does not support RSS. Current supported verdicts are XDP_DROP and XDP_PASS. This patch has been tested on Marvell ESPRESSObin using xdp_redirect_cpu sample available in the kernel tree to identify possible performance regressions. Results show there are no observable differences in packet-per-second: $./xdp_redirect_cpu --progname xdp_cpu_map0 --dev eth0 --cpu 1 rx: 354.8 Kpps rx: 356.0 Kpps rx: 356.8 Kpps rx: 356.3 Kpps rx: 356.6 Kpps rx: 356.6 Kpps rx: 356.7 Kpps rx: 355.8 Kpps rx: 356.8 Kpps rx: 356.8 Kpps Co-developed-by: Jesper Dangaard Brouer Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/5c9febdf903d810b3415732e5cd98491d7d9067a.1594734381.git.lorenzo@kernel.org --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 109623527358..c010b57fce3f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -227,6 +227,7 @@ enum bpf_attach_type { BPF_CGROUP_INET6_GETSOCKNAME, BPF_XDP_DEVMAP, BPF_CGROUP_INET_SOCK_RELEASE, + BPF_XDP_CPUMAP, __MAX_BPF_ATTACH_TYPE }; @@ -3856,6 +3857,10 @@ struct bpf_devmap_val { */ struct bpf_cpumap_val { __u32 qsize; /* queue size to remote target CPU */ + union { + int fd; /* prog fd on map write */ + __u32 id; /* prog id on map read */ + } bpf_prog; }; enum sk_action { -- cgit v1.2.3 From bfdfa51702dec67e9fcd52568b4cf3c7f799db8b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 18:29:11 -0700 Subject: bpf: Drop duplicated words in uapi helper comments Drop doubled words "will" and "attach". Signed-off-by: Randy Dunlap Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/6b9f71ae-4f8e-0259-2c5d-187ddaefe6eb@infradead.org --- include/uapi/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c010b57fce3f..7ac3992dacfe 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2420,7 +2420,7 @@ union bpf_attr { * Look for an IPv6 socket. * * If the *netns* is a negative signed 32-bit integer, then the - * socket lookup table in the netns associated with the *ctx* will + * socket lookup table in the netns associated with the *ctx* * will be used. For the TC hooks, this is the netns of the device * in the skb. For socket hooks, this is the netns of the socket. * If *netns* is any other signed 32-bit value greater than or @@ -2457,7 +2457,7 @@ union bpf_attr { * Look for an IPv6 socket. * * If the *netns* is a negative signed 32-bit integer, then the - * socket lookup table in the netns associated with the *ctx* will + * socket lookup table in the netns associated with the *ctx* * will be used. For the TC hooks, this is the netns of the device * in the skb. For socket hooks, this is the netns of the socket. * If *netns* is any other signed 32-bit value greater than or @@ -4000,7 +4000,7 @@ struct bpf_link_info { /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed * by user and intended to be used by socket (e.g. to bind to, depends on - * attach attach type). + * attach type). */ struct bpf_sock_addr { __u32 user_family; /* Allows 4-byte read, but no write. */ -- cgit v1.2.3 From e3a5a1e8b6548f5d37328e2d3571edc5c9e6d7c0 Mon Sep 17 00:00:00 2001 From: Priyaranjan Jha Date: Thu, 16 Jul 2020 12:12:35 -0700 Subject: tcp: add SNMP counter for no. of duplicate segments reported by DSACK There are two existing SNMP counters, TCPDSACKRecv and TCPDSACKOfoRecv, which are incremented depending on whether the DSACKed range is below the cumulative ACK sequence number or not. Unfortunately, these both implicitly assume each DSACK covers only one segment. This makes these counters unusable for estimating spurious retransmit rates, or real/non-spurious loss rate. This patch introduces a new SNMP counter, TCPDSACKRecvSegs, which tracks the estimated number of duplicate segments based on: (DSACKed sequence range) / MSS. This counter is usable for estimating spurious retransmit rates, or real/non-spurious loss rate. Signed-off-by: Priyaranjan Jha Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 7d91f4debc48..cee9f8e6fce3 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -287,6 +287,7 @@ enum LINUX_MIB_TCPFASTOPENPASSIVEALTKEY, /* TCPFastOpenPassiveAltKey */ LINUX_MIB_TCPTIMEOUTREHASH, /* TCPTimeoutRehash */ LINUX_MIB_TCPDUPLICATEDATAREHASH, /* TCPDuplicateDataRehash */ + LINUX_MIB_TCPDSACKRECVSEGS, /* TCPDSACKRecvSegs */ __LINUX_MIB_MAX }; -- cgit v1.2.3 From e9ddbb7707ff5891616240026062b8c1e29864ca Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:23 +0200 Subject: bpf: Introduce SK_LOOKUP program type with a dedicated attach point Add a new program type BPF_PROG_TYPE_SK_LOOKUP with a dedicated attach type BPF_SK_LOOKUP. The new program kind is to be invoked by the transport layer when looking up a listening socket for a new connection request for connection oriented protocols, or when looking up an unconnected socket for a packet for connection-less protocols. When called, SK_LOOKUP BPF program can select a socket that will receive the packet. This serves as a mechanism to overcome the limits of what bind() API allows to express. Two use-cases driving this work are: (1) steer packets destined to an IP range, on fixed port to a socket 192.0.2.0/24, port 80 -> NGINX socket (2) steer packets destined to an IP address, on any port to a socket 198.51.100.1, any port -> L7 proxy socket In its run-time context program receives information about the packet that triggered the socket lookup. Namely IP version, L4 protocol identifier, and address 4-tuple. Context can be further extended to include ingress interface identifier. To select a socket BPF program fetches it from a map holding socket references, like SOCKMAP or SOCKHASH, and calls bpf_sk_assign(ctx, sk, ...) helper to record the selection. Transport layer then uses the selected socket as a result of socket lookup. In its basic form, SK_LOOKUP acts as a filter and hence must return either SK_PASS or SK_DROP. If the program returns with SK_PASS, transport should look for a socket to receive the packet, or use the one selected by the program if available, while SK_DROP informs the transport layer that the lookup should fail. This patch only enables the user to attach an SK_LOOKUP program to a network namespace. Subsequent patches hook it up to run on local delivery path in ipv4 and ipv6 stacks. Suggested-by: Marek Majkowski Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200717103536.397595-3-jakub@cloudflare.com --- include/uapi/linux/bpf.h | 77 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7ac3992dacfe..54d0c886e3ba 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -189,6 +189,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_STRUCT_OPS, BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_LSM, + BPF_PROG_TYPE_SK_LOOKUP, }; enum bpf_attach_type { @@ -228,6 +229,7 @@ enum bpf_attach_type { BPF_XDP_DEVMAP, BPF_CGROUP_INET_SOCK_RELEASE, BPF_XDP_CPUMAP, + BPF_SK_LOOKUP, __MAX_BPF_ATTACH_TYPE }; @@ -3069,6 +3071,10 @@ union bpf_attr { * * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) * Description + * Helper is overloaded depending on BPF program type. This + * description applies to **BPF_PROG_TYPE_SCHED_CLS** and + * **BPF_PROG_TYPE_SCHED_ACT** programs. + * * Assign the *sk* to the *skb*. When combined with appropriate * routing configuration to receive the packet towards the socket, * will cause *skb* to be delivered to the specified socket. @@ -3094,6 +3100,56 @@ union bpf_attr { * **-ESOCKTNOSUPPORT** if the socket type is not supported * (reuseport). * + * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags) + * Description + * Helper is overloaded depending on BPF program type. This + * description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs. + * + * Select the *sk* as a result of a socket lookup. + * + * For the operation to succeed passed socket must be compatible + * with the packet description provided by the *ctx* object. + * + * L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must + * be an exact match. While IP family (**AF_INET** or + * **AF_INET6**) must be compatible, that is IPv6 sockets + * that are not v6-only can be selected for IPv4 packets. + * + * Only TCP listeners and UDP unconnected sockets can be + * selected. *sk* can also be NULL to reset any previous + * selection. + * + * *flags* argument can combination of following values: + * + * * **BPF_SK_LOOKUP_F_REPLACE** to override the previous + * socket selection, potentially done by a BPF program + * that ran before us. + * + * * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip + * load-balancing within reuseport group for the socket + * being selected. + * + * On success *ctx->sk* will point to the selected socket. + * + * Return + * 0 on success, or a negative errno in case of failure. + * + * * **-EAFNOSUPPORT** if socket family (*sk->family*) is + * not compatible with packet family (*ctx->family*). + * + * * **-EEXIST** if socket has been already selected, + * potentially by another program, and + * **BPF_SK_LOOKUP_F_REPLACE** flag was not specified. + * + * * **-EINVAL** if unsupported flags were specified. + * + * * **-EPROTOTYPE** if socket L4 protocol + * (*sk->protocol*) doesn't match packet protocol + * (*ctx->protocol*). + * + * * **-ESOCKTNOSUPPORT** if socket is not in allowed + * state (TCP listening or UDP unconnected). + * * u64 bpf_ktime_get_boot_ns(void) * Description * Return the time elapsed since system boot, in nanoseconds. @@ -3607,6 +3663,12 @@ enum { BPF_RINGBUF_HDR_SZ = 8, }; +/* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */ +enum { + BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0), + BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1), +}; + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, @@ -4349,4 +4411,19 @@ struct bpf_pidns_info { __u32 pid; __u32 tgid; }; + +/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ +struct bpf_sk_lookup { + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + + __u32 family; /* Protocol family (AF_INET, AF_INET6) */ + __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ + __u32 remote_ip4; /* Network byte order */ + __u32 remote_ip6[4]; /* Network byte order */ + __u32 remote_port; /* Network byte order */ + __u32 local_ip4; /* Network byte order */ + __u32 local_ip6[4]; /* Network byte order */ + __u32 local_port; /* Host byte order */ +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From b3ab1c6058fad8cd5726f24e9ed9053e43bb2af4 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Wed, 24 Jun 2020 21:28:00 +0200 Subject: media: Add V4L2_TYPE_IS_CAPTURE helper It's all too easy to get confused by the V4L2_TYPE_IS_OUTPUT macro, when it's used as !V4L2_TYPE_IS_OUTPUT. Reduce the risk of confusion with macro to explicitly check for the CAPTURE queue type case. This change does not affect functionality, and it's only intended to make the code more readable. Suggested-by: Nicolas Dufresne Signed-off-by: Ezequiel Garcia Signed-off-by: Hans Verkuil [hverkuil-cisco@xs4all.nl: checkpatch: align with parenthesis] Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 303805438814..c7b70ff53bc1 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -171,6 +171,8 @@ enum v4l2_buf_type { || (type) == V4L2_BUF_TYPE_SDR_OUTPUT \ || (type) == V4L2_BUF_TYPE_META_OUTPUT) +#define V4L2_TYPE_IS_CAPTURE(type) (!V4L2_TYPE_IS_OUTPUT(type)) + enum v4l2_tuner_type { V4L2_TUNER_RADIO = 1, V4L2_TUNER_ANALOG_TV = 2, -- cgit v1.2.3 From 124ea650d3072b005457faed69909221c2905a1f Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sun, 19 Jul 2020 12:04:11 +0200 Subject: capabilities: Introduce CAP_CHECKPOINT_RESTORE This patch introduces CAP_CHECKPOINT_RESTORE, a new capability facilitating checkpoint/restore for non-root users. Over the last years, The CRIU (Checkpoint/Restore In Userspace) team has been asked numerous times if it is possible to checkpoint/restore a process as non-root. The answer usually was: 'almost'. The main blocker to restore a process as non-root was to control the PID of the restored process. This feature available via the clone3 system call, or via /proc/sys/kernel/ns_last_pid is unfortunately guarded by CAP_SYS_ADMIN. In the past two years, requests for non-root checkpoint/restore have increased due to the following use cases: * Checkpoint/Restore in an HPC environment in combination with a resource manager distributing jobs where users are always running as non-root. There is a desire to provide a way to checkpoint and restore long running jobs. * Container migration as non-root * We have been in contact with JVM developers who are integrating CRIU into a Java VM to decrease the startup time. These checkpoint/restore applications are not meant to be running with CAP_SYS_ADMIN. We have seen the following workarounds: * Use a setuid wrapper around CRIU: See https://github.com/FredHutch/slurm-examples/blob/master/checkpointer/lib/checkpointer/checkpointer-suid.c * Use a setuid helper that writes to ns_last_pid. Unfortunately, this helper delegation technique is impossible to use with clone3, and is thus prone to races. See https://github.com/twosigma/set_ns_last_pid * Cycle through PIDs with fork() until the desired PID is reached: This has been demonstrated to work with cycling rates of 100,000 PIDs/s See https://github.com/twosigma/set_ns_last_pid * Patch out the CAP_SYS_ADMIN check from the kernel * Run the desired application in a new user and PID namespace to provide a local CAP_SYS_ADMIN for controlling PIDs. This technique has limited use in typical container environments (e.g., Kubernetes) as /proc is typically protected with read-only layers (e.g., /proc/sys) for hardening purposes. Read-only layers prevent additional /proc mounts (due to proc's SB_I_USERNS_VISIBLE property), making the use of new PID namespaces limited as certain applications need access to /proc matching their PID namespace. The introduced capability allows to: * Control PIDs when the current user is CAP_CHECKPOINT_RESTORE capable for the corresponding PID namespace via ns_last_pid/clone3. * Open files in /proc/pid/map_files when the current user is CAP_CHECKPOINT_RESTORE capable in the root namespace, useful for recovering files that are unreachable via the file system such as deleted files, or memfd files. See corresponding selftest for an example with clone3(). Signed-off-by: Adrian Reber Signed-off-by: Nicolas Viennot Reviewed-by: Serge Hallyn Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20200719100418.2112740-2-areber@redhat.com Signed-off-by: Christian Brauner --- include/uapi/linux/capability.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 48ff0757ae5e..395dd0df8d08 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -408,7 +408,14 @@ struct vfs_ns_cap_data { */ #define CAP_BPF 39 -#define CAP_LAST_CAP CAP_BPF + +/* Allow checkpoint/restore related operations */ +/* Allow PID selection during clone3() */ +/* Allow writing to ns_last_pid */ + +#define CAP_CHECKPOINT_RESTORE 40 + +#define CAP_LAST_CAP CAP_CHECKPOINT_RESTORE #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) -- cgit v1.2.3 From c4471ad9a50d5548e66ae4511acfb1dc23a48744 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 20 Jul 2020 00:03:33 +0200 Subject: net: phy: add USXGMII link partner ability constants The constants are taken from the USXGMII Singleport Copper Interface specification. The naming are based on the SGMII ones, but with an MDIO_ prefix. Signed-off-by: Michael Walle Reviewed-by: Russell King Signed-off-by: David S. Miller --- include/uapi/linux/mdio.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 4bcb41c71b8c..3f302e2523b2 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -324,4 +324,30 @@ static inline __u16 mdio_phy_id_c45(int prtad, int devad) return MDIO_PHY_ID_C45 | (prtad << 5) | devad; } +/* UsxgmiiChannelInfo[15:0] for USXGMII in-band auto-negotiation.*/ +#define MDIO_USXGMII_EEE_CLK_STP 0x0080 /* EEE clock stop supported */ +#define MDIO_USXGMII_EEE 0x0100 /* EEE supported */ +#define MDIO_USXGMII_SPD_MASK 0x0e00 /* USXGMII speed mask */ +#define MDIO_USXGMII_FULL_DUPLEX 0x1000 /* USXGMII full duplex */ +#define MDIO_USXGMII_DPX_SPD_MASK 0x1e00 /* USXGMII duplex and speed bits */ +#define MDIO_USXGMII_10 0x0000 /* 10Mbps */ +#define MDIO_USXGMII_10HALF 0x0000 /* 10Mbps half-duplex */ +#define MDIO_USXGMII_10FULL 0x1000 /* 10Mbps full-duplex */ +#define MDIO_USXGMII_100 0x0200 /* 100Mbps */ +#define MDIO_USXGMII_100HALF 0x0200 /* 100Mbps half-duplex */ +#define MDIO_USXGMII_100FULL 0x1200 /* 100Mbps full-duplex */ +#define MDIO_USXGMII_1000 0x0400 /* 1000Mbps */ +#define MDIO_USXGMII_1000HALF 0x0400 /* 1000Mbps half-duplex */ +#define MDIO_USXGMII_1000FULL 0x1400 /* 1000Mbps full-duplex */ +#define MDIO_USXGMII_10G 0x0600 /* 10Gbps */ +#define MDIO_USXGMII_10GHALF 0x0600 /* 10Gbps half-duplex */ +#define MDIO_USXGMII_10GFULL 0x1600 /* 10Gbps full-duplex */ +#define MDIO_USXGMII_2500 0x0800 /* 2500Mbps */ +#define MDIO_USXGMII_2500HALF 0x0800 /* 2500Mbps half-duplex */ +#define MDIO_USXGMII_2500FULL 0x1800 /* 2500Mbps full-duplex */ +#define MDIO_USXGMII_5000 0x0a00 /* 5000Mbps */ +#define MDIO_USXGMII_5000HALF 0x0a00 /* 5000Mbps half-duplex */ +#define MDIO_USXGMII_5000FULL 0x1a00 /* 5000Mbps full-duplex */ +#define MDIO_USXGMII_LINK 0x8000 /* PHY link with copper-side partner */ + #endif /* _UAPI__LINUX_MDIO_H__ */ -- cgit v1.2.3 From eba75c587e811d3249c8bd50d22bb2266ccd3c0f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 10 Jul 2020 09:29:02 -0400 Subject: icmp: support rfc 4884 Add setsockopt SOL_IP/IP_RECVERR_4884 to return the offset to an extension struct if present. ICMP messages may include an extension structure after the original datagram. RFC 4884 standardized this behavior. It stores the offset in words to the extension header in u8 icmphdr.un.reserved[1]. The field is valid only for ICMP types destination unreachable, time exceeded and parameter problem, if length is at least 128 bytes and entire packet does not exceed 576 bytes. Return the offset to the start of the extension struct when reading an ICMP error from the error queue, if it matches the above constraints. Do not return the raw u8 field. Return the offset from the start of the user buffer, in bytes. The kernel does not return the network and transport headers, so subtract those. Also validate the headers. Return the offset regardless of validation, as an invalid extension must still not be misinterpreted as part of the original datagram. Note that !invalid does not imply valid. If the extension version does not match, no validation can take place, for instance. For backward compatibility, make this optional, set by setsockopt SOL_IP/IP_RECVERR_RFC4884. For API example and feature test, see github.com/wdebruij/kerneltools/blob/master/tests/recv_icmp_v2.c For forward compatibility, reserve only setsockopt value 1, leaving other bits for additional icmp extensions. Changes v1->v2: - convert word offset to byte offset from start of user buffer - return in ee_data as u8 may be insufficient - define extension struct and object header structs - return len only if constraints met - if returning len, also validate Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/errqueue.h | 14 +++++++++++++- include/uapi/linux/icmp.h | 22 ++++++++++++++++++++++ include/uapi/linux/in.h | 1 + 3 files changed, 36 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index ca5cb3e3c6df..3c70e8ac14b8 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -5,6 +5,13 @@ #include #include +/* RFC 4884: return offset to extension struct + validation */ +struct sock_ee_data_rfc4884 { + __u16 len; + __u8 flags; + __u8 reserved; +}; + struct sock_extended_err { __u32 ee_errno; __u8 ee_origin; @@ -12,7 +19,10 @@ struct sock_extended_err { __u8 ee_code; __u8 ee_pad; __u32 ee_info; - __u32 ee_data; + union { + __u32 ee_data; + struct sock_ee_data_rfc4884 ee_rfc4884; + }; }; #define SO_EE_ORIGIN_NONE 0 @@ -31,6 +41,8 @@ struct sock_extended_err { #define SO_EE_CODE_TXTIME_INVALID_PARAM 1 #define SO_EE_CODE_TXTIME_MISSED 2 +#define SO_EE_RFC4884_FLAG_INVALID 1 + /** * struct scm_timestamping - timestamps exposed through cmsg * diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h index 5589eeb791ca..fb169a50895e 100644 --- a/include/uapi/linux/icmp.h +++ b/include/uapi/linux/icmp.h @@ -19,6 +19,7 @@ #define _UAPI_LINUX_ICMP_H #include +#include #define ICMP_ECHOREPLY 0 /* Echo Reply */ #define ICMP_DEST_UNREACH 3 /* Destination Unreachable */ @@ -95,5 +96,26 @@ struct icmp_filter { __u32 data; }; +/* RFC 4884 extension struct: one per message */ +struct icmp_ext_hdr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 reserved1:4, + version:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 version:4, + reserved1:4; +#else +#error "Please fix " +#endif + __u8 reserved2; + __sum16 checksum; +}; + +/* RFC 4884 extension object header: one for each object */ +struct icmp_extobj_hdr { + __be16 length; + __u8 class_num; + __u8 class_type; +}; #endif /* _UAPI_LINUX_ICMP_H */ diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 8533bf07450f..3d0d8231dc19 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -123,6 +123,7 @@ struct in_addr { #define IP_CHECKSUM 23 #define IP_BIND_ADDRESS_NO_PORT 24 #define IP_RECVFRAGSIZE 25 +#define IP_RECVERR_RFC4884 26 /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ -- cgit v1.2.3 From f65b71aa25a65e13cf3d10445a48c63d3eeb942e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Jul 2020 01:45:29 +0300 Subject: ptp: add ability to configure duty cycle for periodic output There are external event timestampers (PHCs with support for PTP_EXTTS_REQUEST) that timestamp both event edges. When those edges are very close (such as in the case of a short pulse), there is a chance that the collected timestamp might be of the rising, or of the falling edge, we never know. There are also PHCs capable of generating periodic output with a configurable duty cycle. This is good news, because we can space the rising and falling edge out enough in time, that the risks to overrun the 1-entry timestamp FIFO of the extts PHC are lower (example: the perout PHC can be configured for a period of 1 second, and an "on" time of 0.5 seconds, resulting in a duty cycle of 50%). A flag is introduced for signaling that an on time is present in the perout request structure, for preserving compatibility. Logically speaking, the duty cycle cannot exceed 100% and the PTP core checks for this. PHC drivers that don't support this flag emit a periodic output of an unspecified duty cycle, same as before. The duty cycle is encoded as an "on" time, similar to the "start" and "period" times, and reuses the reserved space while preserving overall binary layout. Pahole reported before: struct ptp_perout_request { struct ptp_clock_time start; /* 0 16 */ struct ptp_clock_time period; /* 16 16 */ unsigned int index; /* 32 4 */ unsigned int flags; /* 36 4 */ unsigned int rsv[4]; /* 40 16 */ /* size: 56, cachelines: 1, members: 5 */ /* last cacheline: 56 bytes */ }; And now: struct ptp_perout_request { struct ptp_clock_time start; /* 0 16 */ struct ptp_clock_time period; /* 16 16 */ unsigned int index; /* 32 4 */ unsigned int flags; /* 36 4 */ union { struct ptp_clock_time on; /* 40 16 */ unsigned int rsv[4]; /* 40 16 */ }; /* 40 16 */ /* size: 56, cachelines: 1, members: 5 */ /* last cacheline: 56 bytes */ }; Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/uapi/linux/ptp_clock.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index ff070aa64278..1d2841155f7d 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -53,12 +53,14 @@ /* * Bits of the ptp_perout_request.flags field: */ -#define PTP_PEROUT_ONE_SHOT (1<<0) +#define PTP_PEROUT_ONE_SHOT (1<<0) +#define PTP_PEROUT_DUTY_CYCLE (1<<1) /* * flag fields valid for the new PTP_PEROUT_REQUEST2 ioctl. */ -#define PTP_PEROUT_VALID_FLAGS (PTP_PEROUT_ONE_SHOT) +#define PTP_PEROUT_VALID_FLAGS (PTP_PEROUT_ONE_SHOT | \ + PTP_PEROUT_DUTY_CYCLE) /* * No flags are valid for the original PTP_PEROUT_REQUEST ioctl @@ -105,7 +107,16 @@ struct ptp_perout_request { struct ptp_clock_time period; /* Desired period, zero means disable. */ unsigned int index; /* Which channel to configure. */ unsigned int flags; - unsigned int rsv[4]; /* Reserved for future use. */ + union { + /* + * The "on" time of the signal. + * Must be lower than the period. + * Valid only if (flags & PTP_PEROUT_DUTY_CYCLE) is set. + */ + struct ptp_clock_time on; + /* Reserved for future use. */ + unsigned int rsv[4]; + }; }; #define PTP_MAX_SAMPLES 25 /* Maximum allowed offset measurement samples. */ -- cgit v1.2.3 From b6bd41363a1ca39282496803cc32f7515ed917fe Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Jul 2020 01:45:30 +0300 Subject: ptp: introduce a phase offset in the periodic output request Some PHCs like the ocelot/felix switch cannot emit generic periodic output, but just PPS (pulse per second) signals, which: - don't start from arbitrary absolute times, but are rather phase-aligned to the beginning of [the closest next] second. - have an optional phase offset relative to that beginning of the second. For those, it was initially established that they should reject any other absolute time for the PTP_PEROUT_REQUEST than 0.000000000 [1]. But when it actually came to writing an application [2] that makes use of this functionality, we realized that we can't really deal generically with PHCs that support absolute start time, and with PHCs that don't, without an explicit interface. Namely, in an ideal world, PHC drivers would ensure that the "perout.start" value written to hardware will result in a functional output. This means that if the PTP time has become in the past of this PHC's current time, it should be automatically fast-forwarded by the driver into a close enough future time that is known to work (note: this is necessary only if the hardware doesn't do this fast-forward by itself). But we don't really know what is the status for PHC drivers in use today, so in the general sense, user space would be risking to have a non-functional periodic output if it simply asked for a start time of 0.000000000. So let's introduce a flag for this type of reduced-functionality hardware, named PTP_PEROUT_PHASE. The start time is just "soon", the only thing we know for sure about this signal is that its rising edge events, Rn, occur at: Rn = perout.phase + n * perout.period The "phase" in the periodic output structure is simply an alias to the "start" time, since both cannot logically be specified at the same time. Therefore, the binary layout of the structure is not affected. [1]: https://patchwork.ozlabs.org/project/netdev/patch/20200320103726.32559-7-yangbo.lu@nxp.com/ [2]: https://www.mail-archive.com/linuxptp-devel@lists.sourceforge.net/msg04142.html Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/uapi/linux/ptp_clock.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 1d2841155f7d..1d108d597f66 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -55,12 +55,14 @@ */ #define PTP_PEROUT_ONE_SHOT (1<<0) #define PTP_PEROUT_DUTY_CYCLE (1<<1) +#define PTP_PEROUT_PHASE (1<<2) /* * flag fields valid for the new PTP_PEROUT_REQUEST2 ioctl. */ #define PTP_PEROUT_VALID_FLAGS (PTP_PEROUT_ONE_SHOT | \ - PTP_PEROUT_DUTY_CYCLE) + PTP_PEROUT_DUTY_CYCLE | \ + PTP_PEROUT_PHASE) /* * No flags are valid for the original PTP_PEROUT_REQUEST ioctl @@ -103,7 +105,20 @@ struct ptp_extts_request { }; struct ptp_perout_request { - struct ptp_clock_time start; /* Absolute start time. */ + union { + /* + * Absolute start time. + * Valid only if (flags & PTP_PEROUT_PHASE) is unset. + */ + struct ptp_clock_time start; + /* + * Phase offset. The signal should start toggling at an + * unspecified integer multiple of the period, plus this value. + * The start time should be "as soon as possible". + * Valid only if (flags & PTP_PEROUT_PHASE) is set. + */ + struct ptp_clock_time phase; + }; struct ptp_clock_time period; /* Desired period, zero means disable. */ unsigned int index; /* Which channel to configure. */ unsigned int flags; -- cgit v1.2.3 From 6c0246a4588d418f72acd40a7b7601be403d80a9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Jul 2020 13:11:28 +0800 Subject: perf: Add perf_event_mmap_page::cap_user_time_short ABI In order to support short clock counters, provide an ABI extension. As a whole: u64 time, delta, cyc = read_cycle_counter(); + if (cap_user_time_short) + cyc = time_cycle + ((cyc - time_cycle) & time_mask); delta = mul_u64_u32_shr(cyc, time_mult, time_shift); if (cap_user_time_zero) time = time_zero + delta; delta += time_offset; Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20200716051130.4359-6-leo.yan@linaro.org Signed-off-by: Will Deacon --- include/uapi/linux/perf_event.h | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 7b2d6fc9e6ed..21a1edd08cbe 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -532,9 +532,10 @@ struct perf_event_mmap_page { cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */ cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */ - cap_user_time : 1, /* The time_* fields are used */ + cap_user_time : 1, /* The time_{shift,mult,offset} fields are used */ cap_user_time_zero : 1, /* The time_zero field is used */ - cap_____res : 59; + cap_user_time_short : 1, /* the time_{cycle,mask} fields are used */ + cap_____res : 58; }; }; @@ -593,13 +594,29 @@ struct perf_event_mmap_page { * ((rem * time_mult) >> time_shift); */ __u64 time_zero; + __u32 size; /* Header size up to __reserved[] fields. */ + __u32 __reserved_1; + + /* + * If cap_usr_time_short, the hardware clock is less than 64bit wide + * and we must compute the 'cyc' value, as used by cap_usr_time, as: + * + * cyc = time_cycles + ((cyc - time_cycles) & time_mask) + * + * NOTE: this form is explicitly chosen such that cap_usr_time_short + * is a correction on top of cap_usr_time, and code that doesn't + * know about cap_usr_time_short still works under the assumption + * the counter doesn't wrap. + */ + __u64 time_cycles; + __u64 time_mask; /* * Hole for extension of the self monitor capabilities */ - __u8 __reserved[118*8+4]; /* align to 1k. */ + __u8 __reserved[116*8]; /* align to 1k. */ /* * Control data for the mmap() data buffer. -- cgit v1.2.3 From b43870c74f3fdf0cd06bf5f1b7a5ed70a2cd4ed2 Mon Sep 17 00:00:00 2001 From: Max Englander Date: Sat, 4 Jul 2020 15:15:28 +0000 Subject: audit: report audit wait metric in audit status reply In environments where the preservation of audit events and predictable usage of system memory are prioritized, admins may use a combination of --backlog_wait_time and -b options at the risk of degraded performance resulting from backlog waiting. In some cases, this risk may be preferred to lost events or unbounded memory usage. Ideally, this risk can be mitigated by making adjustments when backlog waiting is detected. However, detection can be difficult using the currently available metrics. For example, an admin attempting to debug degraded performance may falsely believe a full backlog indicates backlog waiting. It may turn out the backlog frequently fills up but drains quickly. To make it easier to reliably track degraded performance to backlog waiting, this patch makes the following changes: Add a new field backlog_wait_time_total to the audit status reply. Initialize this field to zero. Add to this field the total time spent by the current task on scheduled timeouts while the backlog limit is exceeded. Reset field to zero upon request via AUDIT_SET. Tested on Ubuntu 18.04 using complementary changes to the audit-userspace and audit-testsuite: - https://github.com/linux-audit/audit-userspace/pull/134 - https://github.com/linux-audit/audit-testsuite/pull/97 Signed-off-by: Max Englander Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 9b6a973f4cc3..cd2d8279a5e4 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -333,14 +333,15 @@ enum { }; /* Status symbols */ - /* Mask values */ -#define AUDIT_STATUS_ENABLED 0x0001 -#define AUDIT_STATUS_FAILURE 0x0002 -#define AUDIT_STATUS_PID 0x0004 + /* Mask values */ +#define AUDIT_STATUS_ENABLED 0x0001 +#define AUDIT_STATUS_FAILURE 0x0002 +#define AUDIT_STATUS_PID 0x0004 #define AUDIT_STATUS_RATE_LIMIT 0x0008 -#define AUDIT_STATUS_BACKLOG_LIMIT 0x0010 -#define AUDIT_STATUS_BACKLOG_WAIT_TIME 0x0020 -#define AUDIT_STATUS_LOST 0x0040 +#define AUDIT_STATUS_BACKLOG_LIMIT 0x0010 +#define AUDIT_STATUS_BACKLOG_WAIT_TIME 0x0020 +#define AUDIT_STATUS_LOST 0x0040 +#define AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL 0x0080 #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT 0x00000001 #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME 0x00000002 @@ -467,6 +468,9 @@ struct audit_status { __u32 feature_bitmap; /* bitmap of kernel audit features */ }; __u32 backlog_wait_time;/* message queue wait timeout */ + __u32 backlog_wait_time_actual;/* time spent waiting while + * message limit exceeded + */ }; struct audit_features { -- cgit v1.2.3 From 4787dd582dbde0b7f29eb3dbe59df3da1b350925 Mon Sep 17 00:00:00 2001 From: Martin Varghese Date: Fri, 17 Jul 2020 08:05:12 +0530 Subject: bareudp: Reverted support to enable & disable rx metadata collection The commit fe80536acf83 ("bareudp: Added attribute to enable & disable rx metadata collection") breaks the the original(5.7) default behavior of bareudp module to collect RX metadadata at the receive. It was added to avoid the crash at the kernel neighbour subsytem when packet with metadata from bareudp is processed. But it is no more needed as the commit 394de110a733 ("net: Added pointer check for dst->ops->neigh_lookup in dst_neigh_lookup_skb") solves this crash. Fixes: fe80536acf83 ("bareudp: Added attribute to enable & disable rx metadata collection") Signed-off-by: Martin Varghese Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 26842ffd0501..af8f31987526 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -601,7 +601,6 @@ enum { IFLA_BAREUDP_ETHERTYPE, IFLA_BAREUDP_SRCPORT_MIN, IFLA_BAREUDP_MULTIPROTO_MODE, - IFLA_BAREUDP_RX_COLLECT_METADATA, __IFLA_BAREUDP_MAX }; -- cgit v1.2.3 From c333f9495c451d958c6f4a41e5de2d8f80f79496 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Jul 2020 16:37:13 -0700 Subject: raid: md_p.h: drop duplicated word in a comment Drop the doubled word "the" in a comment. Signed-off-by: Randy Dunlap Cc: Song Liu Cc: linux-raid@vger.kernel.org Signed-off-by: Song Liu --- include/uapi/linux/raid/md_p.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index 1f2d8c81f0e0..e5a98a16f9b0 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -123,7 +123,7 @@ typedef struct mdp_device_descriptor_s { /* * Notes: - * - if an array is being reshaped (restriped) in order to change the + * - if an array is being reshaped (restriped) in order to change * the number of active devices in the array, 'raid_disks' will be * the larger of the old and new numbers. 'delta_disks' will * be the "new - old". So if +ve, raid_disks is the new value, and -- cgit v1.2.3 From 1859f4ebcf2dfe2e878a3805700036fdc4e01e2c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Jul 2020 17:27:38 -0700 Subject: android: binder.h: drop a duplicated word MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the repeated word "the" in a comment. Cc: Arve Hjønnevåg Cc: Todd Kjos Cc: Martijn Coenen Cc: Joel Fernandes Cc: Hridya Valsaraju Cc: Suren Baghdasaryan Cc: devel@driverdev.osuosl.org Acked-by: Christian Brauner Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20200719002738.20210-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 2832134e5397..f1ce2c4c077e 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -404,7 +404,7 @@ enum binder_driver_return_protocol { BR_FAILED_REPLY = _IO('r', 17), /* - * The the last transaction (either a bcTRANSACTION or + * The last transaction (either a bcTRANSACTION or * a bcATTEMPT_ACQUIRE) failed (e.g. out of memory). No parameters. */ }; -- cgit v1.2.3 From 7deff7b5b4395784b194bae3631b8333d3423938 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Jul 2020 17:28:41 -0700 Subject: hyperv: hyperv.h: drop a duplicated word Drop the repeated word "the" in a comment. Signed-off-by: Randy Dunlap Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Wei Liu Cc: linux-hyperv@vger.kernel.org Link: https://lore.kernel.org/r/20200719002841.20369-1-rdunlap@infradead.org Signed-off-by: Wei Liu --- include/uapi/linux/hyperv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h index 8f24404ad04f..6135d92e0d47 100644 --- a/include/uapi/linux/hyperv.h +++ b/include/uapi/linux/hyperv.h @@ -219,7 +219,7 @@ struct hv_do_fcopy { * kernel and user-level daemon communicate using a connector channel. * * The user mode component first registers with the - * the kernel component. Subsequently, the kernel component requests, data + * kernel component. Subsequently, the kernel component requests, data * for the specified keys. In response to this message the user mode component * fills in the value corresponding to the specified key. We overload the * sequence field in the cn_msg header to define our KVP message types. -- cgit v1.2.3 From 5923b8f7fa218a9bccd730c0a9692635eb2fc740 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Thu, 23 Jul 2020 01:03:01 +0300 Subject: net/sched: cls_flower: Add hash info to flow classification Adding new cls flower keys for hash value and hash mask and dissect the hash info from the skb into the flow key towards flow classication. Signed-off-by: Ariel Levkovich Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 7576209d96f9..ee95f42fb0ec 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -578,6 +578,9 @@ enum { TCA_FLOWER_KEY_MPLS_OPTS, + TCA_FLOWER_KEY_HASH, /* u32 */ + TCA_FLOWER_KEY_HASH_MASK, /* u32 */ + __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From 01370434df85eb76ecb1527a4466013c4aca2436 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Jul 2020 09:03:10 -0400 Subject: icmp6: support rfc 4884 Extend the rfc 4884 read interface introduced for ipv4 in commit eba75c587e81 ("icmp: support rfc 4884") to ipv6. Add socket option SOL_IPV6/IPV6_RECVERR_RFC4884. Changes v1->v2: - make ipv6_icmp_error_rfc4884 static (file scope) Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/icmpv6.h | 1 + include/uapi/linux/in6.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index 2622b5a3e616..c1661febc2dc 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -68,6 +68,7 @@ struct icmp6hdr { #define icmp6_mtu icmp6_dataun.un_data32[0] #define icmp6_unused icmp6_dataun.un_data32[0] #define icmp6_maxdelay icmp6_dataun.un_data16[0] +#define icmp6_datagram_len icmp6_dataun.un_data8[0] #define icmp6_router icmp6_dataun.u_nd_advt.router #define icmp6_solicited icmp6_dataun.u_nd_advt.solicited #define icmp6_override icmp6_dataun.u_nd_advt.override diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 9f2273a08356..5ad396a57eb3 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -179,6 +179,7 @@ struct in6_flowlabel_req { #define IPV6_LEAVE_ANYCAST 28 #define IPV6_MULTICAST_ALL 29 #define IPV6_ROUTER_ALERT_ISOLATE 30 +#define IPV6_RECVERR_RFC4884 31 /* IPV6_MTU_DISCOVER values */ #define IPV6_PMTUDISC_DONT 0 -- cgit v1.2.3 From d721a43ff69cd473019c3b77aacb76b09102aca3 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 25 Jul 2020 20:00:27 +0800 Subject: bcache: increase super block version for cache device and backing device The new added super block version BCACHE_SB_VERSION_BDEV_WITH_FEATURES (5) BCACHE_SB_VERSION_CDEV_WITH_FEATURES value (6), is for the feature set bits. Devices have super block version equal to the new version will have three new members for feature set bits in the on-disk super block, __le64 feature_compat; __le64 feature_incompat; __le64 feature_ro_compat; They are used for further new features which may introduce on-disk format change, and avoid unncessary super block version increase. The very basic features handling code skeleton is also initialized in this patch. Signed-off-by: Coly Li Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/uapi/linux/bcache.h | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 9a1965c6c3d0..47df2db2e727 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -141,11 +141,13 @@ static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys) * Version 3: Cache device with new UUID format * Version 4: Backing device with data offset */ -#define BCACHE_SB_VERSION_CDEV 0 -#define BCACHE_SB_VERSION_BDEV 1 -#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 -#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 -#define BCACHE_SB_MAX_VERSION 4 +#define BCACHE_SB_VERSION_CDEV 0 +#define BCACHE_SB_VERSION_BDEV 1 +#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 +#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 +#define BCACHE_SB_VERSION_CDEV_WITH_FEATURES 5 +#define BCACHE_SB_VERSION_BDEV_WITH_FEATURES 6 +#define BCACHE_SB_MAX_VERSION 6 #define SB_SECTOR 8 #define SB_OFFSET (SB_SECTOR << SECTOR_SHIFT) @@ -173,7 +175,12 @@ struct cache_sb_disk { __le64 flags; __le64 seq; - __le64 pad[8]; + + __le64 feature_compat; + __le64 feature_incompat; + __le64 feature_ro_compat; + + __le64 pad[5]; union { struct { @@ -224,7 +231,12 @@ struct cache_sb { __u64 flags; __u64 seq; - __u64 pad[8]; + + __u64 feature_compat; + __u64 feature_incompat; + __u64 feature_ro_compat; + + __u64 pad[5]; union { struct { @@ -262,7 +274,8 @@ struct cache_sb { static inline _Bool SB_IS_BDEV(const struct cache_sb *sb) { return sb->version == BCACHE_SB_VERSION_BDEV - || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET; + || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET + || sb->version == BCACHE_SB_VERSION_BDEV_WITH_FEATURES; } BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); -- cgit v1.2.3 From 4c1ccd0896d6a45f2159280d957afd441a7aeaba Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 25 Jul 2020 20:00:29 +0800 Subject: bcache: struct cache_sb is only for in-memory super block now We have struct cache_sb_disk for on-disk super block already, it is unnecessary to keep the in-memory super block format exactly mapping to the on-disk struct layout. This patch adds code comments to notice that struct cache_sb is not exactly mapping to cache_sb_disk, and removes the useless member csum and pad[5]. Although struct cache_sb does not belong to uapi, but there are still some on-disk format related macros reference it and it is unncessary to get rid of such dependency now. So struct cache_sb will continue to stay in include/uapi/linux/bache.h for now. Signed-off-by: Coly Li Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/uapi/linux/bcache.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 47df2db2e727..0ef984ea515a 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -215,8 +215,13 @@ struct cache_sb_disk { __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ }; +/* + * This is for in-memory bcache super block. + * NOTE: cache_sb is NOT exactly mapping to cache_sb_disk, the member + * size, ordering and even whole struct size may be different + * from cache_sb_disk. + */ struct cache_sb { - __u64 csum; __u64 offset; /* sector where this sb was written */ __u64 version; @@ -236,8 +241,6 @@ struct cache_sb { __u64 feature_incompat; __u64 feature_ro_compat; - __u64 pad[5]; - union { struct { /* Cache devices */ @@ -245,7 +248,6 @@ struct cache_sb { __u16 block_size; /* sectors */ __u16 bucket_size; /* sectors */ - __u16 nr_in_set; __u16 nr_this_dev; }; -- cgit v1.2.3 From ffa470327572b8f85dceda48fd0676d9658cb8c5 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 25 Jul 2020 20:00:35 +0800 Subject: bcache: add bucket_size_hi into struct cache_sb_disk for large bucket The large bucket feature is to extend bucket_size from 16bit to 32bit. When create cache device on zoned device (e.g. zoned NVMe SSD), making a single bucket cover one or more zones of the zoned device is the simplest way to support zoned device as cache by bcache. But current maximum bucket size is 16MB and a typical zone size of zoned device is 256MB, this is the major motiviation to extend bucket size to a larger bit width. This patch is the basic and first change to support large bucket size, the major changes it makes are, - Add BCH_FEATURE_INCOMPAT_LARGE_BUCKET for the large bucket feature, INCOMPAT means it introduces incompatible on-disk format change. - Add BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LARGE_BUCKET) routines. - Adds __le16 bucket_size_hi into struct cache_sb_disk at offset 0x8d0 for the on-disk super block format. - For the in-memory super block struct cache_sb, member bucket_size is extended from __u16 to __32. - Add get_bucket_size() to combine the bucket_size and bucket_size_hi from struct cache_sb_disk into an unsigned int value. Since we already have large bucket size helpers meta_bucket_pages(), meta_bucket_bytes() and alloc_meta_bucket_pages(), they make sure when bucket size > 8MB, the memory allocation for bcache meta data bucket won't fail no matter how large the bucket size extended. So these meta data buckets are handled properly when the bucket size width increase from 16bit to 32bit, we don't need to worry about them. Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- include/uapi/linux/bcache.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 0ef984ea515a..52e8bcb33981 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -213,6 +213,7 @@ struct cache_sb_disk { __le16 keys; }; __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ + __le16 bucket_size_hi; }; /* @@ -247,9 +248,9 @@ struct cache_sb { __u64 nbuckets; /* device size */ __u16 block_size; /* sectors */ - __u16 bucket_size; /* sectors */ __u16 nr_in_set; __u16 nr_this_dev; + __u32 bucket_size; /* sectors */ }; struct { /* Backing devices */ -- cgit v1.2.3 From 92fe2aa859f52ce6aa595ca97fec110dc7100e63 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 20 Jul 2020 15:07:30 -0700 Subject: libnvdimm: Validate command family indices The ND_CMD_CALL format allows for a general passthrough of passlisted commands targeting a given command set. However there is no validation of the family index relative to what the bus supports. - Update the NFIT bus implementation (the only one that supports ND_CMD_CALL passthrough) to also passlist the valid set of command family indices. - Update the generic __nd_ioctl() path to validate that field on behalf of all implementations. Fixes: 31eca76ba2fc ("nfit, libnvdimm: limited/whitelisted dimm command marshaling mechanism") Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Signed-off-by: Dan Williams Signed-off-by: Vishal Verma --- include/uapi/linux/ndctl.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 0e09dc5cec19..e9468b9332bd 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -245,6 +245,10 @@ struct nd_cmd_pkg { #define NVDIMM_FAMILY_MSFT 3 #define NVDIMM_FAMILY_HYPERV 4 #define NVDIMM_FAMILY_PAPR 5 +#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_PAPR + +#define NVDIMM_BUS_FAMILY_NFIT 0 +#define NVDIMM_BUS_FAMILY_MAX NVDIMM_BUS_FAMILY_NFIT #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ struct nd_cmd_pkg) -- cgit v1.2.3 From 6450ddbd5d8e83ea9927c7f9076a21f829699e0f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 20 Jul 2020 15:07:40 -0700 Subject: ACPI: NFIT: Define runtime firmware activation commands Platform reboots are expensive. Towards reducing downtime to apply firmware updates the Intel NVDIMM command definition is growing support for applying live firmware updates that only require temporarily suspending memory traffic instead of a full reboot. Follow-on commits add support for triggering firmware activation, this patch only defines the commands, adds probe support, and validates that they are blocked via the ioctl path. The ioctl-path block ensures that the OS is in charge since these commands have side effects only the OS can handle. Specifically firmware activation may cause the memory controller to be quiesced on the order of 100s of milliseconds. In that case Linux ensure the activation only takes place while the OS is in a suspend state. Link: https://pmem.io/documents/IntelOptanePMem_DSM_Interface-V2.0.pdf Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Cc: "Rafael J. Wysocki" Cc: Len Brown Signed-off-by: Dan Williams Signed-off-by: Vishal Verma --- include/uapi/linux/ndctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index e9468b9332bd..8cf1e4884fd5 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -248,7 +248,8 @@ struct nd_cmd_pkg { #define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_PAPR #define NVDIMM_BUS_FAMILY_NFIT 0 -#define NVDIMM_BUS_FAMILY_MAX NVDIMM_BUS_FAMILY_NFIT +#define NVDIMM_BUS_FAMILY_INTEL 1 +#define NVDIMM_BUS_FAMILY_MAX NVDIMM_BUS_FAMILY_INTEL #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ struct nd_cmd_pkg) -- cgit v1.2.3 From a5cbe05a6673b85bed2a63ffcfea6a96c6410cff Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 23 Jul 2020 11:41:12 -0700 Subject: bpf: Implement bpf iterator for map elements The bpf iterator for map elements are implemented. The bpf program will receive four parameters: bpf_iter_meta *meta: the meta data bpf_map *map: the bpf_map whose elements are traversed void *key: the key of one element void *value: the value of the same element Here, meta and map pointers are always valid, and key has register type PTR_TO_RDONLY_BUF_OR_NULL and value has register type PTR_TO_RDWR_BUF_OR_NULL. The kernel will track the access range of key and value during verification time. Later, these values will be compared against the values in the actual map to ensure all accesses are within range. A new field iter_seq_info is added to bpf_map_ops which is used to add map type specific information, i.e., seq_ops, init/fini seq_file func and seq_file private data size. Subsequent patches will have actual implementation for bpf_map_ops->iter_seq_info. In user space, BPF_ITER_LINK_MAP_FD needs to be specified in prog attr->link_create.flags, which indicates that attr->link_create.target_fd is a map_fd. The reason for such an explicit flag is for possible future cases where one bpf iterator may allow more than one possible customization, e.g., pid and cgroup id for task_file. Current kernel internal implementation only allows the target to register at most one required bpf_iter_link_info. To support the above case, optional bpf_iter_link_info's are needed, the target can be extended to register such link infos, and user provided link_info needs to match one of target supported ones. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200723184112.590360-1-yhs@fb.com --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 54d0c886e3ba..828c2f6438f2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -246,6 +246,13 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; +enum bpf_iter_link_info { + BPF_ITER_LINK_UNSPEC = 0, + BPF_ITER_LINK_MAP_FD = 1, + + MAX_BPF_ITER_LINK_INFO, +}; + /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. -- cgit v1.2.3 From aa8d3a716b59db6c1ad6c68fb8aa05e31980da60 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Jul 2020 23:45:57 -0700 Subject: bpf, xdp: Add bpf_link-based XDP attachment API Add bpf_link-based API (bpf_xdp_link) to attach BPF XDP program through BPF_LINK_CREATE command. bpf_xdp_link is mutually exclusive with direct BPF program attachment, previous BPF program should be detached prior to attempting to create a new bpf_xdp_link attachment (for a given XDP mode). Once BPF link is attached, it can't be replaced by other BPF program attachment or link attachment. It will be detached only when the last BPF link FD is closed. bpf_xdp_link will be auto-detached when net_device is shutdown, similarly to how other BPF links behave (cgroup, flow_dissector). At that point bpf_link will become defunct, but won't be destroyed until last FD is closed. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200722064603.3350758-5-andriin@fb.com --- include/uapi/linux/bpf.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 828c2f6438f2..87823fb9c123 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -230,6 +230,7 @@ enum bpf_attach_type { BPF_CGROUP_INET_SOCK_RELEASE, BPF_XDP_CPUMAP, BPF_SK_LOOKUP, + BPF_XDP, __MAX_BPF_ATTACH_TYPE }; @@ -242,6 +243,7 @@ enum bpf_link_type { BPF_LINK_TYPE_CGROUP = 3, BPF_LINK_TYPE_ITER = 4, BPF_LINK_TYPE_NETNS = 5, + BPF_LINK_TYPE_XDP = 6, MAX_BPF_LINK_TYPE, }; @@ -614,7 +616,10 @@ union bpf_attr { struct { /* struct used by BPF_LINK_CREATE command */ __u32 prog_fd; /* eBPF program to attach */ - __u32 target_fd; /* object to attach to */ + union { + __u32 target_fd; /* object to attach to */ + __u32 target_ifindex; /* target ifindex */ + }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ } link_create; -- cgit v1.2.3 From c1931c9784ebb5787c0784c112fb8baa5e8455b3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Jul 2020 23:45:59 -0700 Subject: bpf: Implement BPF XDP link-specific introspection APIs Implement XDP link-specific show_fdinfo and link_info to emit ifindex. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200722064603.3350758-7-andriin@fb.com --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 87823fb9c123..e1ba4ae6a916 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4069,6 +4069,9 @@ struct bpf_link_info { __u32 netns_ino; __u32 attach_type; } netns; + struct { + __u32 ifindex; + } xdp; }; } __attribute__((aligned(8))); -- cgit v1.2.3 From 1a0c02ba643ed05c07ddf14d87c3bec640666836 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 20 Jul 2020 08:50:58 -0700 Subject: dmaengine: idxd: add missing invalid flags field to completion Add missing "invalid flags" field to completion record struct. Reported-by: Nikhil Rao Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/159526025819.49266.13176787210106133664.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 1f412fbf561b..5d07a3ff9e96 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -178,6 +178,12 @@ struct dsa_completion_record { uint32_t bytes_completed; uint64_t fault_addr; union { + /* common record */ + struct { + uint32_t invalid_flags:24; + uint32_t rsvd2:8; + }; + uint16_t delta_rec_size; uint16_t crc_val; -- cgit v1.2.3 From 06f67c47076e5c3ee65276171596479dcc3a3941 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 28 Jun 2020 13:07:14 +0800 Subject: btrfs: use __u16 for the return value of btrfs_qgroup_level() The qgroup level is limited to u16, so no need to use u64 for it. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index a3f3975df0de..9ba64ca6b4ac 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -913,9 +913,9 @@ struct btrfs_free_space_info { #define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0) #define BTRFS_QGROUP_LEVEL_SHIFT 48 -static inline __u64 btrfs_qgroup_level(__u64 qgroupid) +static inline __u16 btrfs_qgroup_level(__u64 qgroupid) { - return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT; + return (__u16)(qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT); } /* -- cgit v1.2.3 From 137c541821a83debb63b3fa8abdd1cbc41bdf3a1 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 13 Jul 2020 21:28:58 +0900 Subject: btrfs: pass checksum type via BTRFS_IOC_FS_INFO ioctl With the recent addition of filesystem checksum types other than CRC32c, it is not anymore hard-coded which checksum type a btrfs filesystem uses. Up to now there is no good way to read the filesystem checksum, apart from reading the filesystem UUID and then query sysfs for the checksum type. Add a new csum_type and csum_size fields to the BTRFS_IOC_FS_INFO ioctl command which usually is used to query filesystem features. Also add a flags member indicating that the kernel responded with a set csum_type and csum_size field. For compatibility reasons, only return the csum_type and csum_size if the BTRFS_FS_INFO_FLAG_CSUM_INFO flag was passed to the kernel. Also clear any unknown flags so we don't pass false positives to user-space newer than the kernel. To simplify further additions to the ioctl, also switch the padding to a u8 array. Pahole was used to verify the result of this switch: The csum members are added before flags, which might look odd, but this is to keep the alignment requirements and not to introduce holes in the structure. $ pahole -C btrfs_ioctl_fs_info_args fs/btrfs/btrfs.ko struct btrfs_ioctl_fs_info_args { __u64 max_id; /* 0 8 */ __u64 num_devices; /* 8 8 */ __u8 fsid[16]; /* 16 16 */ __u32 nodesize; /* 32 4 */ __u32 sectorsize; /* 36 4 */ __u32 clone_alignment; /* 40 4 */ __u16 csum_type; /* 44 2 */ __u16 csum_size; /* 46 2 */ __u64 flags; /* 48 8 */ __u8 reserved[968]; /* 56 968 */ /* size: 1024, cachelines: 16, members: 10 */ }; Fixes: 3951e7f050ac ("btrfs: add xxhash64 to checksumming algorithms") Fixes: 3831bf0094ab ("btrfs: add sha256 to checksumming algorithm") CC: stable@vger.kernel.org # 5.5+ Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index e6b6cb0f8bc6..24f6848ad78e 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -243,6 +243,13 @@ struct btrfs_ioctl_dev_info_args { __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */ }; +/* + * Retrieve information about the filesystem + */ + +/* Request information about checksum type and size */ +#define BTRFS_FS_INFO_FLAG_CSUM_INFO (1 << 0) + struct btrfs_ioctl_fs_info_args { __u64 max_id; /* out */ __u64 num_devices; /* out */ @@ -250,8 +257,11 @@ struct btrfs_ioctl_fs_info_args { __u32 nodesize; /* out */ __u32 sectorsize; /* out */ __u32 clone_alignment; /* out */ - __u32 reserved32; - __u64 reserved[122]; /* pad to 1k */ + /* See BTRFS_FS_INFO_FLAG_* */ + __u16 csum_type; /* out */ + __u16 csum_size; /* out */ + __u64 flags; /* in/out */ + __u8 reserved[968]; /* pad to 1k */ }; /* -- cgit v1.2.3 From 0fb408a558aadbaa58beb75b02c95741e1fbb514 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 13 Jul 2020 21:28:59 +0900 Subject: btrfs: add filesystem generation to FS_INFO ioctl Add retrieval of the filesystem's generation to the fsinfo ioctl. This is driven by setting the BTRFS_FS_INFO_FLAG_GENERATION flag in btrfs_ioctl_fs_info_args::flags. Reviewed-by: Nikolay Borisov Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 24f6848ad78e..9b82e01c191d 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -250,6 +250,9 @@ struct btrfs_ioctl_dev_info_args { /* Request information about checksum type and size */ #define BTRFS_FS_INFO_FLAG_CSUM_INFO (1 << 0) +/* Request information about filesystem generation */ +#define BTRFS_FS_INFO_FLAG_GENERATION (1 << 1) + struct btrfs_ioctl_fs_info_args { __u64 max_id; /* out */ __u64 num_devices; /* out */ @@ -261,7 +264,8 @@ struct btrfs_ioctl_fs_info_args { __u16 csum_type; /* out */ __u16 csum_size; /* out */ __u64 flags; /* in/out */ - __u8 reserved[968]; /* pad to 1k */ + __u64 generation; /* out */ + __u8 reserved[960]; /* pad to 1k */ }; /* -- cgit v1.2.3 From 49bac897683340457a0ab1f76b924a1220bdb604 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 13 Jul 2020 21:29:00 +0900 Subject: btrfs: add metadata_uuid to FS_INFO ioctl Add retrieval of the filesystem's metadata UUID to the fsinfo ioctl. This is driven by setting the BTRFS_FS_INFO_FLAG_METADATA_UUID flag in btrfs_ioctl_fs_info_args::flags. Reviewed-by: Nikolay Borisov Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 9b82e01c191d..2c39d15a2beb 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -252,6 +252,8 @@ struct btrfs_ioctl_dev_info_args { /* Request information about filesystem generation */ #define BTRFS_FS_INFO_FLAG_GENERATION (1 << 1) +/* Request information about filesystem metadata UUID */ +#define BTRFS_FS_INFO_FLAG_METADATA_UUID (1 << 2) struct btrfs_ioctl_fs_info_args { __u64 max_id; /* out */ @@ -265,7 +267,8 @@ struct btrfs_ioctl_fs_info_args { __u16 csum_size; /* out */ __u64 flags; /* in/out */ __u64 generation; /* out */ - __u8 reserved[960]; /* pad to 1k */ + __u8 metadata_uuid[BTRFS_FSID_SIZE]; /* out */ + __u8 reserved[944]; /* pad to 1k */ }; /* -- cgit v1.2.3 From 50c8a002bfd43798768ad07833b2b9d4b4d5274f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Jul 2020 17:29:03 -0700 Subject: platform/x86: ISST: drop a duplicated word in isst_if.h Drop the repeated word "for" in a comment. Signed-off-by: Randy Dunlap Cc: Srinivas Pandruvada Cc: platform-driver-x86@vger.kernel.org Cc: Darren Hart Cc: Andy Shevchenko Acked-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- include/uapi/linux/isst_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/isst_if.h b/include/uapi/linux/isst_if.h index 0a52b7b093d3..ba078f8e9add 100644 --- a/include/uapi/linux/isst_if.h +++ b/include/uapi/linux/isst_if.h @@ -69,7 +69,7 @@ struct isst_if_cpu_maps { * @logical_cpu: Logical CPU number to get target PCI device. * @reg: PUNIT register offset * @value: For write operation value to write and for - * for read placeholder read value + * read placeholder read value * * Structure to specify read/write data to PUNIT registers. */ -- cgit v1.2.3 From 1e6b57d6421f0343dd11619612e5ff8930cddf38 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 11 Jun 2020 11:11:32 -0400 Subject: unexport linux/elfcore.h It's unusable from userland - it uses elf_gregset_t, which is not provided by exported headers. glibc has it in sys/procfs.h, but the same file defines struct elf_prstatus, so linux/elfcore.h can't be included once sys/procfs.h has been pulled. Same goes for uclibc and dietlibc simply doesn't have elf_gregset_t defined anywhere. IOW, no userland source is including that thing. Signed-off-by: Al Viro --- include/uapi/linux/elfcore.h | 101 ------------------------------------------- 1 file changed, 101 deletions(-) delete mode 100644 include/uapi/linux/elfcore.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elfcore.h b/include/uapi/linux/elfcore.h deleted file mode 100644 index baf03562306d..000000000000 --- a/include/uapi/linux/elfcore.h +++ /dev/null @@ -1,101 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_LINUX_ELFCORE_H -#define _UAPI_LINUX_ELFCORE_H - -#include -#include -#include -#include -#include -#include - -struct elf_siginfo -{ - int si_signo; /* signal number */ - int si_code; /* extra code */ - int si_errno; /* errno */ -}; - - -#ifndef __KERNEL__ -typedef elf_greg_t greg_t; -typedef elf_gregset_t gregset_t; -typedef elf_fpregset_t fpregset_t; -typedef elf_fpxregset_t fpxregset_t; -#define NGREG ELF_NGREG -#endif - -/* - * Definitions to generate Intel SVR4-like core files. - * These mostly have the same names as the SVR4 types with "elf_" - * tacked on the front to prevent clashes with linux definitions, - * and the typedef forms have been avoided. This is mostly like - * the SVR4 structure, but more Linuxy, with things that Linux does - * not support and which gdb doesn't really use excluded. - * Fields present but not used are marked with "XXX". - */ -struct elf_prstatus -{ -#if 0 - long pr_flags; /* XXX Process flags */ - short pr_why; /* XXX Reason for process halt */ - short pr_what; /* XXX More detailed reason */ -#endif - struct elf_siginfo pr_info; /* Info associated with signal */ - short pr_cursig; /* Current signal */ - unsigned long pr_sigpend; /* Set of pending signals */ - unsigned long pr_sighold; /* Set of held signals */ -#if 0 - struct sigaltstack pr_altstack; /* Alternate stack info */ - struct sigaction pr_action; /* Signal action for current sig */ -#endif - pid_t pr_pid; - pid_t pr_ppid; - pid_t pr_pgrp; - pid_t pr_sid; - struct __kernel_old_timeval pr_utime; /* User time */ - struct __kernel_old_timeval pr_stime; /* System time */ - struct __kernel_old_timeval pr_cutime; /* Cumulative user time */ - struct __kernel_old_timeval pr_cstime; /* Cumulative system time */ -#if 0 - long pr_instr; /* Current instruction */ -#endif - elf_gregset_t pr_reg; /* GP registers */ -#ifdef CONFIG_BINFMT_ELF_FDPIC - /* When using FDPIC, the loadmap addresses need to be communicated - * to GDB in order for GDB to do the necessary relocations. The - * fields (below) used to communicate this information are placed - * immediately after ``pr_reg'', so that the loadmap addresses may - * be viewed as part of the register set if so desired. - */ - unsigned long pr_exec_fdpic_loadmap; - unsigned long pr_interp_fdpic_loadmap; -#endif - int pr_fpvalid; /* True if math co-processor being used. */ -}; - -#define ELF_PRARGSZ (80) /* Number of chars for args */ - -struct elf_prpsinfo -{ - char pr_state; /* numeric process state */ - char pr_sname; /* char for pr_state */ - char pr_zomb; /* zombie */ - char pr_nice; /* nice val */ - unsigned long pr_flag; /* flags */ - __kernel_uid_t pr_uid; - __kernel_gid_t pr_gid; - pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; - /* Lots missing */ - char pr_fname[16]; /* filename of executable */ - char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ -}; - -#ifndef __KERNEL__ -typedef struct elf_prstatus prstatus_t; -typedef struct elf_prpsinfo prpsinfo_t; -#define PRARGSZ ELF_PRARGSZ -#endif - - -#endif /* _UAPI_LINUX_ELFCORE_H */ -- cgit v1.2.3 From 8f4c0e01789c18674acdf17cae3822b3dc3db715 Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Wed, 22 Jul 2020 10:40:16 -0400 Subject: hsr: enhance netlink socket interface to support PRP Parallel Redundancy Protocol (PRP) is another redundancy protocol introduced by IEC 63439 standard. It is similar to HSR in many aspects:- - Use a pair of Ethernet interfaces to created the PRP device - Use a 6 byte redundancy protocol part (RCT, Redundancy Check Trailer) similar to HSR Tag. - Has Link Redundancy Entity (LRE) that works with RCT to implement redundancy. Key difference is that the protocol unit is a trailer instead of a prefix as in HSR. That makes it inter-operable with tradition network components such as bridges/switches which treat it as pad bytes, whereas HSR nodes requires some kind of translators (Called redbox) to talk to regular network devices. This features allows regular linux box to be converted to a DAN-P box. DAN-P stands for Dual Attached Node - PRP similar to DAN-H (Dual Attached Node - HSR). Add a comment at the header/source code to explicitly state that the driver files also handles PRP protocol as well. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- include/uapi/linux/hsr_netlink.h | 2 +- include/uapi/linux/if_link.h | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/hsr_netlink.h b/include/uapi/linux/hsr_netlink.h index c218ef9c35dd..d540ea9bbef4 100644 --- a/include/uapi/linux/hsr_netlink.h +++ b/include/uapi/linux/hsr_netlink.h @@ -17,7 +17,7 @@ /* Generic Netlink HSR family definition */ -/* attributes */ +/* attributes for HSR or PRP node */ enum { HSR_A_UNSPEC, HSR_A_NODE_ADDR, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index af8f31987526..63af64646358 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -907,7 +907,14 @@ enum { #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) -/* HSR section */ +/* HSR/PRP section, both uses same interface */ + +/* Different redundancy protocols for hsr device */ +enum { + HSR_PROTOCOL_HSR, + HSR_PROTOCOL_PRP, + HSR_PROTOCOL_MAX, +}; enum { IFLA_HSR_UNSPEC, @@ -917,6 +924,9 @@ enum { IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ IFLA_HSR_SEQ_NR, IFLA_HSR_VERSION, /* HSR version */ + IFLA_HSR_PROTOCOL, /* Indicate different protocol than + * HSR. For example PRP. + */ __IFLA_HSR_MAX, }; -- cgit v1.2.3 From 08b95c338e0c5a96e47f4ca314ea1e7580ecb5d7 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 8 Jul 2020 14:11:52 +0300 Subject: fanotify: remove event FAN_DIR_MODIFY It was never enabled in uapi and its functionality is about to be superseded by events FAN_CREATE, FAN_DELETE, FAN_MOVE with group flag FAN_REPORT_NAME. Keep a place holder variable name_event instead of removing the name recording code since it will be used by the new events. Link: https://lore.kernel.org/r/20200708111156.24659-17-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index a88c7c6d0692..7f2f17eacbf9 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -24,7 +24,6 @@ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ -#define FAN_DIR_MODIFY 0x00080000 /* Directory entry was modified */ #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ -- cgit v1.2.3 From 83b7a59896dd24015a34b7f00027f0ff3747972f Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 16 Jul 2020 11:42:26 +0300 Subject: fanotify: add basic support for FAN_REPORT_DIR_FID For now, the flag is mutually exclusive with FAN_REPORT_FID. Events include a single info record of type FAN_EVENT_INFO_TYPE_DFID with a directory file handle. For now, events are only reported for: - Directory modification events - Events on children of a watching directory - Events on directory objects Soon, we will add support for reporting the parent directory fid for events on non-directories with filesystem/mount mark and support for reporting both parent directory fid and child fid. Link: https://lore.kernel.org/r/20200716084230.30611-19-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 7f2f17eacbf9..21afebf77fd7 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -53,6 +53,7 @@ /* Flags to determine fanotify event format */ #define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ #define FAN_REPORT_FID 0x00000200 /* Report unique file id */ +#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ /* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ @@ -117,6 +118,7 @@ struct fanotify_event_metadata { #define FAN_EVENT_INFO_TYPE_FID 1 #define FAN_EVENT_INFO_TYPE_DFID_NAME 2 +#define FAN_EVENT_INFO_TYPE_DFID 3 /* Variable length info record following event metadata */ struct fanotify_event_info_header { @@ -126,10 +128,11 @@ struct fanotify_event_info_header { }; /* - * Unique file identifier info record. This is used both for - * FAN_EVENT_INFO_TYPE_FID records and for FAN_EVENT_INFO_TYPE_DFID_NAME - * records. For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null - * terminated name immediately after the file handle. + * Unique file identifier info record. + * This structure is used for records of types FAN_EVENT_INFO_TYPE_FID, + * FAN_EVENT_INFO_TYPE_DFID and FAN_EVENT_INFO_TYPE_DFID_NAME. + * For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null terminated + * name immediately after the file handle. */ struct fanotify_event_info_fid { struct fanotify_event_info_header hdr; -- cgit v1.2.3 From 929943b38daf817f2e6d303ea04401651fc3bc05 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 16 Jul 2020 11:42:28 +0300 Subject: fanotify: add support for FAN_REPORT_NAME Introduce a new fanotify_init() flag FAN_REPORT_NAME. It requires the flag FAN_REPORT_DIR_FID and there is a constant for setting both flags named FAN_REPORT_DFID_NAME. For a group with flag FAN_REPORT_NAME, the parent fid and name are reported for directory entry modification events (create/detete/move) and for events on non-directory objects. Events on directories themselves are reported with their own fid and "." as the name. The parent fid and name are reported with an info record of type FAN_EVENT_INFO_TYPE_DFID_NAME, similar to the way that parent fid is reported with into type FAN_EVENT_INFO_TYPE_DFID, but with an appended null terminated name string. Link: https://lore.kernel.org/r/20200716084230.30611-21-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 21afebf77fd7..fbf9c5c7dd59 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -54,6 +54,10 @@ #define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ #define FAN_REPORT_FID 0x00000200 /* Report unique file id */ #define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ +#define FAN_REPORT_NAME 0x00000800 /* Report events with name */ + +/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ +#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) /* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ -- cgit v1.2.3 From e1613b5714ee6c186c9628e9958edf65e9d9cddd Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 27 Jul 2020 15:47:15 -0700 Subject: bpf: Fix bpf_ringbuf_output() signature to return long Due to bpf tree fix merge, bpf_ringbuf_output() signature ended up with int as a return type, while all other helpers got converted to returning long. So fix it in bpf-next now. Fixes: b0659d8a950d ("bpf: Fix definition of bpf_ringbuf_output() helper in UAPI comments") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200727224715.652037-1-andriin@fb.com --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e1ba4ae6a916..eb5e0c38eb2c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3241,7 +3241,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) + * long bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification -- cgit v1.2.3 From bc2d214af5dbcf1e53be9a23d95905a585657ff4 Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Sun, 26 Jul 2020 17:35:09 +0200 Subject: scsi: target: tcmu: Implement tmr_notify callback This patch implements the tmr_notify callback for tcmu. When the callback is called, tcmu checks the list of aborted commands it received as parameter: - aborted commands in the qfull_queue are removed from the queue and target_complete_command is called - from the cmd_ids of aborted commands currently uncompleted in cmd ring it creates a list of aborted cmd_ids. Finally a TMR notification is written to cmd ring containing TMR type and cmd_id list. If there is no space in ring, the TMR notification is queued on a TMR specific queue. The TMR specific queue 'tmr_queue' can be seen as a extension of the cmd ring. At the end of each iexecution of tcmu_complete_commands() we check whether tmr_queue contains TMRs and try to move them onto the ring. If tmr_queue is not empty after that, we don't call run_qfull_queue() because commands must not overtake TMRs. This way we guarantee that cmd_ids in TMR notification received by userspace either match an active, not yet completed command or are no longer valid due to userspace having complete some cmd_ids meanwhile. New commands that were assigned to an aborted cmd_id will always appear on the cmd ring _after_ the TMR. Link: https://lore.kernel.org/r/20200726153510.13077-8-bstroesser@ts.fujitsu.com Reviewed-by: Mike Christie Signed-off-by: Bodo Stroesser Signed-off-by: Martin K. Petersen --- include/uapi/linux/target_core_user.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index b7b57967d90f..95b1597f16ae 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -45,6 +45,7 @@ #define ALIGN_SIZE 64 /* Should be enough for most CPUs */ #define TCMU_MAILBOX_FLAG_CAP_OOOC (1 << 0) /* Out-of-order completions */ #define TCMU_MAILBOX_FLAG_CAP_READ_LEN (1 << 1) /* Read data length */ +#define TCMU_MAILBOX_FLAG_CAP_TMR (1 << 2) /* TMR notifications */ struct tcmu_mailbox { __u16 version; @@ -62,6 +63,7 @@ struct tcmu_mailbox { enum tcmu_opcode { TCMU_OP_PAD = 0, TCMU_OP_CMD, + TCMU_OP_TMR, }; /* @@ -128,6 +130,29 @@ struct tcmu_cmd_entry { } __packed; +struct tcmu_tmr_entry { + struct tcmu_cmd_entry_hdr hdr; + +#define TCMU_TMR_UNKNOWN 0 +#define TCMU_TMR_ABORT_TASK 1 +#define TCMU_TMR_ABORT_TASK_SET 2 +#define TCMU_TMR_CLEAR_ACA 3 +#define TCMU_TMR_CLEAR_TASK_SET 4 +#define TCMU_TMR_LUN_RESET 5 +#define TCMU_TMR_TARGET_WARM_RESET 6 +#define TCMU_TMR_TARGET_COLD_RESET 7 +/* Pseudo reset due to received PR OUT */ +#define TCMU_TMR_LUN_RESET_PRO 128 + __u8 tmr_type; + + __u8 __pad1; + __u16 __pad2; + __u32 cmd_cnt; + __u64 __pad3; + __u64 __pad4; + __u16 cmd_ids[0]; +} __packed; + #define TCMU_OP_ALIGN_SIZE sizeof(__u64) enum tcmu_genl_cmd { -- cgit v1.2.3 From 50935339c394adfb3d7253055e3bc10ee70264b0 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Sat, 25 Jul 2020 19:02:25 +0200 Subject: netfilter: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_connmark.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_connmark.h b/include/uapi/linux/netfilter/xt_connmark.h index 1aa5c955ee1e..f01c19b83a2b 100644 --- a/include/uapi/linux/netfilter/xt_connmark.h +++ b/include/uapi/linux/netfilter/xt_connmark.h @@ -4,7 +4,7 @@ #include -/* Copyright (C) 2002,2004 MARA Systems AB +/* Copyright (C) 2002,2004 MARA Systems AB * by Henrik Nordstrom * * This program is free software; you can redistribute it and/or modify -- cgit v1.2.3 From df78a0c0b67de58934877aad61e0431a2bd0caf1 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 1 Jun 2020 23:22:47 -0700 Subject: nl80211: S1G band and channel definitions Gives drivers the definitions needed to advertise support for S1G bands. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200602062247.23212-1-thomas@adapt-ip.com Link: https://lore.kernel.org/r/20200731055636.795173-1-thomas@adapt-ip.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4e6339ab1fce..ad183469f9af 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4437,6 +4437,11 @@ enum nl80211_key_mode { * attribute must be provided as well * @NL80211_CHAN_WIDTH_5: 5 MHz OFDM channel * @NL80211_CHAN_WIDTH_10: 10 MHz OFDM channel + * @NL80211_CHAN_WIDTH_1: 1 MHz OFDM channel + * @NL80211_CHAN_WIDTH_2: 2 MHz OFDM channel + * @NL80211_CHAN_WIDTH_4: 4 MHz OFDM channel + * @NL80211_CHAN_WIDTH_8: 8 MHz OFDM channel + * @NL80211_CHAN_WIDTH_16: 16 MHz OFDM channel */ enum nl80211_chan_width { NL80211_CHAN_WIDTH_20_NOHT, @@ -4447,6 +4452,11 @@ enum nl80211_chan_width { NL80211_CHAN_WIDTH_160, NL80211_CHAN_WIDTH_5, NL80211_CHAN_WIDTH_10, + NL80211_CHAN_WIDTH_1, + NL80211_CHAN_WIDTH_2, + NL80211_CHAN_WIDTH_4, + NL80211_CHAN_WIDTH_8, + NL80211_CHAN_WIDTH_16, }; /** @@ -4457,11 +4467,15 @@ enum nl80211_chan_width { * @NL80211_BSS_CHAN_WIDTH_20: control channel is 20 MHz wide or compatible * @NL80211_BSS_CHAN_WIDTH_10: control channel is 10 MHz wide * @NL80211_BSS_CHAN_WIDTH_5: control channel is 5 MHz wide + * @NL80211_BSS_CHAN_WIDTH_1: control channel is 1 MHz wide + * @NL80211_BSS_CHAN_WIDTH_2: control channel is 2 MHz wide */ enum nl80211_bss_scan_width { NL80211_BSS_CHAN_WIDTH_20, NL80211_BSS_CHAN_WIDTH_10, NL80211_BSS_CHAN_WIDTH_5, + NL80211_BSS_CHAN_WIDTH_1, + NL80211_BSS_CHAN_WIDTH_2, }; /** @@ -4740,6 +4754,7 @@ enum nl80211_txrate_gi { * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz) * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 69.12 GHz) * @NL80211_BAND_6GHZ: around 6 GHz band (5.9 - 7.2 GHz) + * @NL80211_BAND_S1GHZ: around 900MHz, supported by S1G PHYs * @NUM_NL80211_BANDS: number of bands, avoid using this in userspace * since newer kernel versions may support more bands */ @@ -4748,6 +4763,7 @@ enum nl80211_band { NL80211_BAND_5GHZ, NL80211_BAND_60GHZ, NL80211_BAND_6GHZ, + NL80211_BAND_S1GHZ, NUM_NL80211_BANDS, }; -- cgit v1.2.3 From 987021726f9f41a1daf335c57cd7b6261109cdb2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:43:21 -0700 Subject: net/wireless: nl80211.h: drop duplicate words in comments Drop doubled words in several comments. Signed-off-by: Randy Dunlap Cc: netdev@vger.kernel.org Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: Johannes Berg Link: https://lore.kernel.org/r/20200715164325.9109-1-rdunlap@infradead.org Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ad183469f9af..f47a7a8d0216 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -363,7 +363,7 @@ * @NL80211_CMD_SET_STATION: Set station attributes for station identified by * %NL80211_ATTR_MAC on the interface identified by %NL80211_ATTR_IFINDEX. * @NL80211_CMD_NEW_STATION: Add a station with given attributes to the - * the interface identified by %NL80211_ATTR_IFINDEX. + * interface identified by %NL80211_ATTR_IFINDEX. * @NL80211_CMD_DEL_STATION: Remove a station identified by %NL80211_ATTR_MAC * or, if no MAC address given, all stations, on the interface identified * by %NL80211_ATTR_IFINDEX. %NL80211_ATTR_MGMT_SUBTYPE and @@ -383,7 +383,7 @@ * @NL80211_CMD_DEL_MPATH: Delete a mesh path to the destination given by * %NL80211_ATTR_MAC. * @NL80211_CMD_NEW_PATH: Add a mesh path with given attributes to the - * the interface identified by %NL80211_ATTR_IFINDEX. + * interface identified by %NL80211_ATTR_IFINDEX. * @NL80211_CMD_DEL_PATH: Remove a mesh path identified by %NL80211_ATTR_MAC * or, if no MAC address given, all mesh paths, on the interface identified * by %NL80211_ATTR_IFINDEX. @@ -934,7 +934,7 @@ * @NL80211_CMD_SET_COALESCE: Configure coalesce rules or clear existing rules. * * @NL80211_CMD_CHANNEL_SWITCH: Perform a channel switch by announcing the - * the new channel information (Channel Switch Announcement - CSA) + * new channel information (Channel Switch Announcement - CSA) * in the beacon for some time (as defined in the * %NL80211_ATTR_CH_SWITCH_COUNT parameter) and then change to the * new channel. Userspace provides the new channel information (using @@ -1113,7 +1113,7 @@ * randomization may be enabled and configured by specifying the * %NL80211_ATTR_MAC and %NL80211_ATTR_MAC_MASK attributes. * If a timeout is requested, use the %NL80211_ATTR_TIMEOUT attribute. - * A u64 cookie for further %NL80211_ATTR_COOKIE use is is returned in + * A u64 cookie for further %NL80211_ATTR_COOKIE use is returned in * the netlink extended ack message. * * To cancel a measurement, close the socket that requested it. @@ -1511,7 +1511,7 @@ enum nl80211_commands { * rates as defined by IEEE 802.11 7.3.2.2 but without the length * restriction (at most %NL80211_MAX_SUPP_RATES). * @NL80211_ATTR_STA_VLAN: interface index of VLAN interface to move station - * to, or the AP interface the station was originally added to to. + * to, or the AP interface the station was originally added to. * @NL80211_ATTR_STA_INFO: information about a station, part of station info * given for %NL80211_CMD_GET_STATION, nested attribute containing * info as possible, see &enum nl80211_sta_info. @@ -2084,7 +2084,7 @@ enum nl80211_commands { * @NL80211_ATTR_STA_SUPPORTED_CHANNELS: array of supported channels. * * @NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES: array of supported - * supported operating classes. + * operating classes. * * @NL80211_ATTR_HANDLE_DFS: A flag indicating whether user space * controls DFS operation in IBSS mode. If the flag is included in @@ -2395,7 +2395,7 @@ enum nl80211_commands { * nl80211_txq_stats) * @NL80211_ATTR_TXQ_LIMIT: Total packet limit for the TXQ queues for this phy. * The smaller of this and the memory limit is enforced. - * @NL80211_ATTR_TXQ_MEMORY_LIMIT: Total memory memory limit (in bytes) for the + * @NL80211_ATTR_TXQ_MEMORY_LIMIT: Total memory limit (in bytes) for the * TXQ queues for this phy. The smaller of this and the packet limit is * enforced. * @NL80211_ATTR_TXQ_QUANTUM: TXQ scheduler quantum (bytes). Number of bytes @@ -5652,7 +5652,7 @@ enum nl80211_feature_flags { * enum nl80211_ext_feature_index - bit index of extended features. * @NL80211_EXT_FEATURE_VHT_IBSS: This driver supports IBSS with VHT datarates. * @NL80211_EXT_FEATURE_RRM: This driver supports RRM. When featured, user can - * can request to use RRM (see %NL80211_ATTR_USE_RRM) with + * request to use RRM (see %NL80211_ATTR_USE_RRM) with * %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests, which will set * the ASSOC_REQ_USE_RRM flag in the association request even if * NL80211_FEATURE_QUIET is not advertized. @@ -6061,7 +6061,7 @@ enum nl80211_dfs_state { }; /** - * enum enum nl80211_protocol_features - nl80211 protocol features + * enum nl80211_protocol_features - nl80211 protocol features * @NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP: nl80211 supports splitting * wiphy dumps (if requested by the application with the attribute * %NL80211_ATTR_SPLIT_WIPHY_DUMP. Also supported is filtering the -- cgit v1.2.3 From 0f55c0c500f2bbfc5cc5590cdf6973b3f64dc195 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:43:22 -0700 Subject: net/wireless: wireless.h: drop duplicate word in comments Drop doubled word "threshold" in a comment. Signed-off-by: Randy Dunlap Cc: netdev@vger.kernel.org Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: Johannes Berg Link: https://lore.kernel.org/r/20200715164325.9109-2-rdunlap@infradead.org Signed-off-by: Johannes Berg --- include/uapi/linux/wireless.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/wireless.h b/include/uapi/linux/wireless.h index 24f3371ad826..08967b3f19c8 100644 --- a/include/uapi/linux/wireless.h +++ b/include/uapi/linux/wireless.h @@ -914,7 +914,7 @@ union iwreq_data { struct iw_param sens; /* signal level threshold */ struct iw_param bitrate; /* default bit rate */ struct iw_param txpower; /* default transmit power */ - struct iw_param rts; /* RTS threshold threshold */ + struct iw_param rts; /* RTS threshold */ struct iw_param frag; /* Fragmentation threshold */ __u32 mode; /* Operation mode */ struct iw_param retry; /* Retry limits & lifetime */ -- cgit v1.2.3 From e3718a611470d311a92c60d4eb535270b49a7108 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Wed, 17 Jun 2020 09:30:33 +0200 Subject: cfg80211/mac80211: add mesh_param "mesh_nolearn" to skip path discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, before being able to forward a packet between two 802.11s nodes, both a PLINK handshake is performed upon receiving a beacon and then later a PREQ/PREP exchange for path discovery is performed on demand upon receiving a data frame to forward. When running a mesh protocol on top of an 802.11s interface, like batman-adv, we do not need the multi-hop mesh routing capabilities of 802.11s and usually set mesh_fwding=0. However, even with mesh_fwding=0 the PREQ/PREP path discovery is still performed on demand. Even though in this scenario the next hop PREQ/PREP will determine is always the direct 11s neighbor node. The new mesh_nolearn parameter allows to skip the PREQ/PREP exchange in this scenario, leading to a reduced delay, reduced packet buffering and simplifies HWMP in general. mesh_nolearn is still rather conservative in that if the packet destination is not a direct 11s neighbor, it will fall back to PREQ/PREP path discovery. For normal, multi-hop 802.11s mesh routing it is usually not advisable to enable mesh_nolearn as a transmission to a direct but distant neighbor might be worse than reaching that same node via a more robust / higher throughput etc. multi-hop path. Cc: Sven Eckelmann Cc: Simon Wunderlich Signed-off-by: Linus Lüssing Link: https://lore.kernel.org/r/20200617073034.26149-1-linus.luessing@c0d3.blue [fix nl80211 policy to range 0/1 only] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f47a7a8d0216..a83d8faf88ac 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4236,6 +4236,12 @@ enum nl80211_mesh_power_mode { * field. If left unset then the mesh formation field will only * advertise such if there is an active root mesh path. * + * @NL80211_MESHCONF_NOLEARN: Try to avoid multi-hop path discovery (e.g. + * PREQ/PREP for HWMP) if the destination is a direct neighbor. Note that + * this might not be the optimal decision as a multi-hop route might be + * better. So if using this setting you will likely also want to disable + * dot11MeshForwarding and use another mesh routing protocol on top. + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -4269,6 +4275,7 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_AWAKE_WINDOW, NL80211_MESHCONF_PLINK_TIMEOUT, NL80211_MESHCONF_CONNECTED_TO_GATE, + NL80211_MESHCONF_NOLEARN, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, -- cgit v1.2.3 From 184eebe664f0e11c485f6d309fe56297b3f75e9e Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Thu, 11 Jun 2020 16:02:37 +0200 Subject: cfg80211/mac80211: add connected to auth server to meshconf Besides information about num of peerings and gate connectivity, the mesh formation byte also contains a flag for authentication server connectivity, that currently cannot be set in the mesh conf. This patch adds this capability, which is necessary to implement 802.1X authentication in mesh mode. Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20200611140238.427461-1-markus.theil@tu-ilmenau.de Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a83d8faf88ac..f1770e3756f4 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4242,6 +4242,10 @@ enum nl80211_mesh_power_mode { * better. So if using this setting you will likely also want to disable * dot11MeshForwarding and use another mesh routing protocol on top. * + * @NL80211_MESHCONF_CONNECTED_TO_AS: If set to true then this mesh STA + * will advertise that it is connected to a authentication server + * in the mesh formation field. + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -4276,6 +4280,7 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_PLINK_TIMEOUT, NL80211_MESHCONF_CONNECTED_TO_GATE, NL80211_MESHCONF_NOLEARN, + NL80211_MESHCONF_CONNECTED_TO_AS, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, -- cgit v1.2.3 From 1303a51c24100b3b1915d6f9072fe5ae5bb4c5f6 Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Thu, 11 Jun 2020 16:02:38 +0200 Subject: cfg80211/mac80211: add connected to auth server to station info This patch adds the necessary bits to later query the auth server flag for every peer from iw. Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20200611140238.427461-2-markus.theil@tu-ilmenau.de Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f1770e3756f4..d6b6599a6001 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3370,6 +3370,8 @@ enum nl80211_sta_bss_param { * @NL80211_STA_INFO_AIRTIME_LINK_METRIC: airtime link metric for mesh station * @NL80211_STA_INFO_ASSOC_AT_BOOTTIME: Timestamp (CLOCK_BOOTTIME, nanoseconds) * of STA's association + * @NL80211_STA_INFO_CONNECTED_TO_AS: set to true if STA has a path to a + * authentication server (u8, 0 or 1) * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -3417,6 +3419,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_AIRTIME_WEIGHT, NL80211_STA_INFO_AIRTIME_LINK_METRIC, NL80211_STA_INFO_ASSOC_AT_BOOTTIME, + NL80211_STA_INFO_CONNECTED_TO_AS, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, -- cgit v1.2.3 From fd17dba1c860d39f655a3a08387c21e3ceca8c55 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Mon, 20 Jul 2020 13:12:25 +0530 Subject: cfg80211: Add support to advertize OCV support Add a new feature flag that drivers can use to advertize support for Operating Channel Validation (OCV) when using driver's SME for RSNA handshakes. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20200720074225.8990-1-vjakkam@codeaurora.org Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d6b6599a6001..a3ae2b060a55 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5804,6 +5804,9 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS: The driver * can report tx status for control port over nl80211 tx operations. * + * @NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION: Driver supports Operating + * Channel Validation (OCV) when using driver's SME for RSNA handshakes. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5859,6 +5862,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT, NL80211_EXT_FEATURE_SCAN_FREQ_KHZ, NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS, + NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From f96622749a67d40ad5efe8a58d5fc95313097aa0 Mon Sep 17 00:00:00 2001 From: Chung-Hsien Hsu Date: Tue, 23 Jun 2020 08:49:35 -0500 Subject: nl80211: support 4-way handshake offloading for WPA/WPA2-PSK in AP mode Let drivers advertise support for AP-mode WPA/WPA2-PSK 4-way handshake offloading with a new NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK flag. Extend use of NL80211_ATTR_PMK attribute indicating it might be passed as part of NL80211_CMD_START_AP command, and contain the PSK (which is the PMK, hence the name). The driver is assumed to handle the 4-way handshake by itself in this case, instead of relying on userspace. Signed-off-by: Chung-Hsien Hsu Signed-off-by: Chi-Hsien Lin Link: https://lore.kernel.org/r/20200623134938.39997-2-chi-hsien.lin@cypress.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a3ae2b060a55..631f3a997b3c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -183,18 +183,27 @@ * * By setting @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK flag drivers * can indicate they support offloading EAPOL handshakes for WPA/WPA2 - * preshared key authentication. In %NL80211_CMD_CONNECT the preshared - * key should be specified using %NL80211_ATTR_PMK. Drivers supporting - * this offload may reject the %NL80211_CMD_CONNECT when no preshared - * key material is provided, for example when that driver does not - * support setting the temporal keys through %CMD_NEW_KEY. + * preshared key authentication in station mode. In %NL80211_CMD_CONNECT + * the preshared key should be specified using %NL80211_ATTR_PMK. Drivers + * supporting this offload may reject the %NL80211_CMD_CONNECT when no + * preshared key material is provided, for example when that driver does + * not support setting the temporal keys through %NL80211_CMD_NEW_KEY. * * Similarly @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X flag can be * set by drivers indicating offload support of the PTK/GTK EAPOL - * handshakes during 802.1X authentication. In order to use the offload - * the %NL80211_CMD_CONNECT should have %NL80211_ATTR_WANT_1X_4WAY_HS - * attribute flag. Drivers supporting this offload may reject the - * %NL80211_CMD_CONNECT when the attribute flag is not present. + * handshakes during 802.1X authentication in station mode. In order to + * use the offload the %NL80211_CMD_CONNECT should have + * %NL80211_ATTR_WANT_1X_4WAY_HS attribute flag. Drivers supporting this + * offload may reject the %NL80211_CMD_CONNECT when the attribute flag is + * not present. + * + * By setting @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK flag drivers + * can indicate they support offloading EAPOL handshakes for WPA/WPA2 + * preshared key authentication in AP mode. In %NL80211_CMD_START_AP + * the preshared key should be specified using %NL80211_ATTR_PMK. Drivers + * supporting this offload may reject the %NL80211_CMD_START_AP when no + * preshared key material is provided, for example when that driver does + * not support setting the temporal keys through %NL80211_CMD_NEW_KEY. * * For 802.1X the PMK or PMK-R0 are set by providing %NL80211_ATTR_PMK * using %NL80211_CMD_SET_PMK. For offloaded FT support also @@ -2362,10 +2371,11 @@ enum nl80211_commands { * * @NL80211_ATTR_PMK: attribute for passing PMK key material. Used with * %NL80211_CMD_SET_PMKSA for the PMKSA identified by %NL80211_ATTR_PMKID. - * For %NL80211_CMD_CONNECT it is used to provide PSK for offloading 4-way - * handshake for WPA/WPA2-PSK networks. For 802.1X authentication it is - * used with %NL80211_CMD_SET_PMK. For offloaded FT support this attribute - * specifies the PMK-R0 if NL80211_ATTR_PMKR0_NAME is included as well. + * For %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP it is used to provide + * PSK for offloading 4-way handshake for WPA/WPA2-PSK networks. For 802.1X + * authentication it is used with %NL80211_CMD_SET_PMK. For offloaded FT + * support this attribute specifies the PMK-R0 if NL80211_ATTR_PMKR0_NAME + * is included as well. * * @NL80211_ATTR_SCHED_SCAN_MULTI: flag attribute which user-space shall use to * indicate that it supports multiple active scheduled scan requests. @@ -5807,6 +5817,10 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION: Driver supports Operating * Channel Validation (OCV) when using driver's SME for RSNA handshakes. * + * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK: Device wants to do 4-way + * handshake with PSK in AP mode (PSK is passed as part of the start AP + * command). + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5863,6 +5877,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SCAN_FREQ_KHZ, NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS, NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 48040793fa6003d211f021c6ad273477bcd90d91 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Thu, 30 Jul 2020 15:44:40 -0700 Subject: tcp: add earliest departure time to SCM_TIMESTAMPING_OPT_STATS This change adds TCP_NLA_EDT to SCM_TIMESTAMPING_OPT_STATS that reports the earliest departure time(EDT) of the timestamped skb. By tracking EDT values of the skb from different timestamps, we can observe when and how much the value changed. This allows to measure the precise delay injected on the sender host e.g. by a bpf-base throttler. Signed-off-by: Yousuk Seung Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index f2acb2566333..cfcb10b75483 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -313,6 +313,7 @@ enum { TCP_NLA_SRTT, /* smoothed RTT in usecs */ TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */ TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */ + TCP_NLA_EDT, /* Earliest departure time (CLOCK_MONOTONIC) */ }; /* for TCP_MD5SIG socket option */ -- cgit v1.2.3 From 829eb208e80d6db95c0201cb8fa00c2f9ad87faf Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Fri, 31 Jul 2020 17:34:01 -0700 Subject: rtnetlink: add support for protodown reason netdev protodown is a mechanism that allows protocols to hold an interface down. It was initially introduced in the kernel to hold links down by a multihoming protocol. There was also an attempt to introduce protodown reason at the time but was rejected. protodown and protodown reason is supported by almost every switching and routing platform. It was ok for a while to live without a protodown reason. But, its become more critical now given more than one protocol may need to keep a link down on a system at the same time. eg: vrrp peer node, port security, multihoming protocol. Its common for Network operators and protocol developers to look for such a reason on a networking box (Its also known as errDisable by most networking operators) This patch adds support for link protodown reason attribute. There are two ways to maintain protodown reasons. (a) enumerate every possible reason code in kernel - A protocol developer has to make a request and have that appear in a certain kernel version (b) provide the bits in the kernel, and allow user-space (sysadmin or NOS distributions) to manage the bit-to-reasonname map. - This makes extending reason codes easier (kind of like the iproute2 table to vrf-name map /etc/iproute2/rt_tables.d/) This patch takes approach (b). a few things about the patch: - It treats the protodown reason bits as counter to indicate active protodown users - Since protodown attribute is already an exposed UAPI, the reason is not enforced on a protodown set. Its a no-op if not used. the patch follows the below algorithm: - presence of reason bits set indicates protodown is in use - user can set protodown and protodown reason in a single or multiple setlink operations - setlink operation to clear protodown, will return -EBUSY if there are active protodown reason bits - reason is not included in link dumps if not used example with patched iproute2: $cat /etc/iproute2/protodown_reasons.d/r.conf 0 mlag 1 evpn 2 vrrp 3 psecurity $ip link set dev vxlan0 protodown on protodown_reason vrrp on $ip link set dev vxlan0 protodown_reason mlag on $ip link show 14: vxlan0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether f6:06:be:17:91:e7 brd ff:ff:ff:ff:ff:ff protodown on $ip link set dev vxlan0 protodown_reason mlag off $ip link set dev vxlan0 protodown off protodown_reason vrrp off Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 63af64646358..7fba4de511de 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -170,12 +170,22 @@ enum { IFLA_PROP_LIST, IFLA_ALT_IFNAME, /* Alternative ifname */ IFLA_PERM_ADDRESS, + IFLA_PROTO_DOWN_REASON, __IFLA_MAX }; #define IFLA_MAX (__IFLA_MAX - 1) +enum { + IFLA_PROTO_DOWN_REASON_UNSPEC, + IFLA_PROTO_DOWN_REASON_MASK, /* u32, mask for reason bits */ + IFLA_PROTO_DOWN_REASON_VALUE, /* u32, reason bit value */ + + __IFLA_PROTO_DOWN_REASON_CNT, + IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1 +}; + /* backwards compatibility for userspace */ #ifndef __KERNEL__ #define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) -- cgit v1.2.3 From 73b11c2ab072d5b0599d1e12cc126f55ee306daf Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 31 Jul 2020 11:28:26 -0700 Subject: bpf: Add support for forced LINK_DETACH command Add LINK_DETACH command to force-detach bpf_link without destroying it. It has the same behavior as auto-detaching of bpf_link due to cgroup dying for bpf_cgroup_link or net_device being destroyed for bpf_xdp_link. In such case, bpf_link is still a valid kernel object, but is defuncts and doesn't hold BPF program attached to corresponding BPF hook. This functionality allows users with enough access rights to manually force-detach attached bpf_link without killing respective owner process. This patch implements LINK_DETACH for cgroup, xdp, and netns links, mostly re-using existing link release handling code. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200731182830.286260-2-andriin@fb.com --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index eb5e0c38eb2c..b134e679e9db 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -117,6 +117,7 @@ enum bpf_cmd { BPF_LINK_GET_NEXT_ID, BPF_ENABLE_STATS, BPF_ITER_CREATE, + BPF_LINK_DETACH, }; enum bpf_map_type { @@ -634,6 +635,10 @@ union bpf_attr { __u32 old_prog_fd; } link_update; + struct { + __u32 link_fd; + } link_detach; + struct { /* struct used by BPF_ENABLE_STATS command */ __u32 type; } enable_stats; -- cgit v1.2.3 From 321bd212619a7269308696e4ddc446930ea73fad Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 24 Jun 2020 18:24:33 -0400 Subject: virtio: VIRTIO_F_IOMMU_PLATFORM -> VIRTIO_F_ACCESS_PLATFORM Rename the bit to match latest virtio spec. Add a compat macro to avoid breaking existing userspace. Signed-off-by: Michael S. Tsirkin Reviewed-by: David Hildenbrand --- include/uapi/linux/virtio_config.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index ff8e7dc9d4dd..b5eda06f0d57 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -67,13 +67,17 @@ #define VIRTIO_F_VERSION_1 32 /* - * If clear - device has the IOMMU bypass quirk feature. - * If set - use platform tools to detect the IOMMU. + * If clear - device has the platform DMA (e.g. IOMMU) bypass quirk feature. + * If set - use platform DMA tools to access the memory. * * Note the reverse polarity (compared to most other features), * this is for compatibility with legacy systems. */ -#define VIRTIO_F_IOMMU_PLATFORM 33 +#define VIRTIO_F_ACCESS_PLATFORM 33 +#ifndef __KERNEL__ +/* Legacy name for VIRTIO_F_ACCESS_PLATFORM (for compatibility with old userspace) */ +#define VIRTIO_F_IOMMU_PLATFORM VIRTIO_F_ACCESS_PLATFORM +#endif /* __KERNEL__ */ /* This feature indicates support for the packed virtqueue layout. */ #define VIRTIO_F_RING_PACKED 34 -- cgit v1.2.3 From 9d2f627b7ec9d5d3246b6cec17f290ee6778c83b Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Fri, 31 Jul 2020 14:20:56 +0200 Subject: net: openvswitch: add masks cache hit counter Add a counter that counts the number of masks cache hits, and export it through the megaflow netlink statistics. Reviewed-by: Paolo Abeni Reviewed-by: Tonghao Zhang Signed-off-by: Eelco Chaudron Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 9b14519e74d9..7cb76e5ca7cf 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -102,8 +102,8 @@ struct ovs_dp_megaflow_stats { __u64 n_mask_hit; /* Number of masks used for flow lookups. */ __u32 n_masks; /* Number of masks for the datapath. */ __u32 pad0; /* Pad for future expension. */ + __u64 n_cache_hit; /* Number of cache matches for flow lookups. */ __u64 pad1; /* Pad for future expension. */ - __u64 pad2; /* Pad for future expension. */ }; struct ovs_vport_stats { -- cgit v1.2.3 From 9bf24f594c6acf676fb8c229f152c21bfb915ddb Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Fri, 31 Jul 2020 14:21:34 +0200 Subject: net: openvswitch: make masks cache size configurable This patch makes the masks cache size configurable, or with a size of 0, disable it. Reviewed-by: Paolo Abeni Reviewed-by: Tonghao Zhang Signed-off-by: Eelco Chaudron Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 7cb76e5ca7cf..8300cc29dec8 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -86,6 +86,7 @@ enum ovs_datapath_attr { OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ OVS_DP_ATTR_PAD, + OVS_DP_ATTR_MASKS_CACHE_SIZE, __OVS_DP_ATTR_MAX }; -- cgit v1.2.3 From 88fab21c691bb1ff164e540735237a385e3afeaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ioana-Ruxandra=20St=C4=83ncioi?= Date: Mon, 3 Aug 2020 07:33:33 +0000 Subject: seg6_iptunnel: Refactor seg6_lwt_headroom out of uapi header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor the function seg6_lwt_headroom out of the seg6_iptunnel.h uapi header, because it is only used in seg6_iptunnel.c. Moreover, it is only used in the kernel code, as indicated by the "#ifdef __KERNEL__". Suggested-by: David Miller Signed-off-by: Ioana-Ruxandra Stăncioi Signed-off-by: David S. Miller --- include/uapi/linux/seg6_iptunnel.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h index 09fb608a35ec..eb815e0d0ac3 100644 --- a/include/uapi/linux/seg6_iptunnel.h +++ b/include/uapi/linux/seg6_iptunnel.h @@ -37,25 +37,4 @@ enum { SEG6_IPTUN_MODE_L2ENCAP, }; -#ifdef __KERNEL__ - -static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) -{ - int head = 0; - - switch (tuninfo->mode) { - case SEG6_IPTUN_MODE_INLINE: - break; - case SEG6_IPTUN_MODE_ENCAP: - head = sizeof(struct ipv6hdr); - break; - case SEG6_IPTUN_MODE_L2ENCAP: - return 0; - } - - return ((tuninfo->srh->hdrlen + 1) << 3) + head; -} - -#endif - #endif -- cgit v1.2.3 From 4476770881d7ac647e3bcae0943f37e00b9c3f3c Mon Sep 17 00:00:00 2001 From: Siddharth Gupta Date: Wed, 29 Jul 2020 10:40:00 -0700 Subject: remoteproc: Add remoteproc character device interface Add the character device interface into remoteproc framework. This interface can be used in order to boot/shutdown remote subsystems and provides a basic ioctl based interface to implement supplementary functionality. An ioctl call is implemented to enable the shutdown on release feature which will allow remote processors to be shutdown when the controlling userspace application crashes or hangs. Reviewed-by: Bjorn Andersson Reviewed-by: Mathieu Poirier Signed-off-by: Rishabh Bhatnagar Signed-off-by: Siddharth Gupta Link: https://lore.kernel.org/r/1596044401-22083-2-git-send-email-sidgup@codeaurora.org [bjorn: s/int32_t/s32/ per checkpatch] Signed-off-by: Bjorn Andersson --- include/uapi/linux/remoteproc_cdev.h | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 include/uapi/linux/remoteproc_cdev.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/remoteproc_cdev.h b/include/uapi/linux/remoteproc_cdev.h new file mode 100644 index 000000000000..c43768e4b0dc --- /dev/null +++ b/include/uapi/linux/remoteproc_cdev.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * IOCTLs for Remoteproc's character device interface. + * + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _UAPI_REMOTEPROC_CDEV_H_ +#define _UAPI_REMOTEPROC_CDEV_H_ + +#include +#include + +#define RPROC_MAGIC 0xB7 + +/* + * The RPROC_SET_SHUTDOWN_ON_RELEASE ioctl allows to enable/disable the shutdown of a remote + * processor automatically when the controlling userpsace closes the char device interface. + * + * input parameter: integer + * 0 : disable automatic shutdown + * other : enable automatic shutdown + */ +#define RPROC_SET_SHUTDOWN_ON_RELEASE _IOW(RPROC_MAGIC, 1, __s32) + +/* + * The RPROC_GET_SHUTDOWN_ON_RELEASE ioctl gets information about whether the automatic shutdown of + * a remote processor is enabled or disabled when the controlling userspace closes the char device + * interface. + * + * output parameter: integer + * 0 : automatic shutdown disable + * other : automatic shutdown enable + */ +#define RPROC_GET_SHUTDOWN_ON_RELEASE _IOR(RPROC_MAGIC, 2, __s32) + +#endif -- cgit v1.2.3 From cae19a6386c86dec8ec2810a96d7497a2eec8d38 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: virtio_9p: correct tags for config space fields Tag config space fields as having virtio endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_9p.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_9p.h b/include/uapi/linux/virtio_9p.h index 277c4ad44e84..441047432258 100644 --- a/include/uapi/linux/virtio_9p.h +++ b/include/uapi/linux/virtio_9p.h @@ -25,7 +25,7 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#include +#include #include #include @@ -36,7 +36,7 @@ struct virtio_9p_config { /* length of the tag name */ - __u16 tag_len; + __virtio16 tag_len; /* non-NULL terminated tag name */ __u8 tag[0]; } __attribute__((packed)); -- cgit v1.2.3 From c73cb10cc4421baa669c1edd8211086280273216 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: virtio_balloon: correct tags for config space fields Tag config space fields as having little endian-ness. Note that balloon is special: LE even when using the legacy interface. Signed-off-by: Michael S. Tsirkin Acked-by: David Hildenbrand Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_balloon.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index dc3e656470dd..ddaa45e723c4 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -45,20 +45,20 @@ #define VIRTIO_BALLOON_CMD_ID_DONE 1 struct virtio_balloon_config { /* Number of pages host wants Guest to give up. */ - __u32 num_pages; + __le32 num_pages; /* Number of pages we've actually got in balloon. */ - __u32 actual; + __le32 actual; /* * Free page hint command id, readonly by guest. * Was previously named free_page_report_cmd_id so we * need to carry that name for legacy support. */ union { - __u32 free_page_hint_cmd_id; - __u32 free_page_report_cmd_id; /* deprecated */ + __le32 free_page_hint_cmd_id; + __le32 free_page_report_cmd_id; /* deprecated */ }; /* Stores PAGE_POISON if page poisoning is in use */ - __u32 poison_val; + __le32 poison_val; }; #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ -- cgit v1.2.3 From 40e04c488bd6ab1859778d40bf9bb3ca294ab97b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: virtio_blk: correct tags for config space fields Tag config space fields as having virtio endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Reviewed-by: Stefano Garzarella Reviewed-by: Stefano Garzarella --- include/uapi/linux/virtio_blk.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index 0f99d7b49ede..d888f013d9ff 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -57,20 +57,20 @@ struct virtio_blk_config { /* The capacity (in 512-byte sectors). */ - __u64 capacity; + __virtio64 capacity; /* The maximum segment size (if VIRTIO_BLK_F_SIZE_MAX) */ - __u32 size_max; + __virtio32 size_max; /* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */ - __u32 seg_max; + __virtio32 seg_max; /* geometry of the device (if VIRTIO_BLK_F_GEOMETRY) */ struct virtio_blk_geometry { - __u16 cylinders; + __virtio16 cylinders; __u8 heads; __u8 sectors; } geometry; /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */ - __u32 blk_size; + __virtio32 blk_size; /* the next 4 entries are guarded by VIRTIO_BLK_F_TOPOLOGY */ /* exponent for physical block per logical block. */ @@ -78,42 +78,42 @@ struct virtio_blk_config { /* alignment offset in logical blocks. */ __u8 alignment_offset; /* minimum I/O size without performance penalty in logical blocks. */ - __u16 min_io_size; + __virtio16 min_io_size; /* optimal sustained I/O size in logical blocks. */ - __u32 opt_io_size; + __virtio32 opt_io_size; /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */ __u8 wce; __u8 unused; /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */ - __u16 num_queues; + __virtio16 num_queues; /* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */ /* * The maximum discard sectors (in 512-byte sectors) for * one segment. */ - __u32 max_discard_sectors; + __virtio32 max_discard_sectors; /* * The maximum number of discard segments in a * discard command. */ - __u32 max_discard_seg; + __virtio32 max_discard_seg; /* Discard commands must be aligned to this number of sectors. */ - __u32 discard_sector_alignment; + __virtio32 discard_sector_alignment; /* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */ /* * The maximum number of write zeroes sectors (in 512-byte sectors) in * one segment. */ - __u32 max_write_zeroes_sectors; + __virtio32 max_write_zeroes_sectors; /* * The maximum number of segments in a write zeroes * command. */ - __u32 max_write_zeroes_seg; + __virtio32 max_write_zeroes_seg; /* * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the * deallocation of one or more of the sectors. -- cgit v1.2.3 From dbe2dc8c5838ef415620a4fe691570b062ab046f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: virtio_console: correct tags for config space fields Tag config space fields as having virtio endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_console.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_console.h b/include/uapi/linux/virtio_console.h index b7fb108c9310..7e6ec2ff0560 100644 --- a/include/uapi/linux/virtio_console.h +++ b/include/uapi/linux/virtio_console.h @@ -45,13 +45,13 @@ struct virtio_console_config { /* colums of the screens */ - __u16 cols; + __virtio16 cols; /* rows of the screens */ - __u16 rows; + __virtio16 rows; /* max. number of ports this device can hold */ - __u32 max_nr_ports; + __virtio32 max_nr_ports; /* emergency write register */ - __u32 emerg_wr; + __virtio32 emerg_wr; } __attribute__((packed)); /* -- cgit v1.2.3 From 24bcf35b695ef5228f3035eb979cc2de571d560b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: virtio_crypto: correct tags for config space fields Since crypto is a modern-only device, tag config space fields as having little endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_crypto.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index 50cdc8aebfcf..a03932f10565 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -414,33 +414,33 @@ struct virtio_crypto_op_data_req { struct virtio_crypto_config { /* See VIRTIO_CRYPTO_OP_* above */ - __u32 status; + __le32 status; /* * Maximum number of data queue */ - __u32 max_dataqueues; + __le32 max_dataqueues; /* * Specifies the services mask which the device support, * see VIRTIO_CRYPTO_SERVICE_* above */ - __u32 crypto_services; + __le32 crypto_services; /* Detailed algorithms mask */ - __u32 cipher_algo_l; - __u32 cipher_algo_h; - __u32 hash_algo; - __u32 mac_algo_l; - __u32 mac_algo_h; - __u32 aead_algo; + __le32 cipher_algo_l; + __le32 cipher_algo_h; + __le32 hash_algo; + __le32 mac_algo_l; + __le32 mac_algo_h; + __le32 aead_algo; /* Maximum length of cipher key */ - __u32 max_cipher_key_len; + __le32 max_cipher_key_len; /* Maximum length of authenticated key */ - __u32 max_auth_key_len; - __u32 reserve; + __le32 max_auth_key_len; + __le32 reserve; /* Maximum size of each crypto request's content */ - __u64 max_size; + __le64 max_size; }; struct virtio_crypto_inhdr { -- cgit v1.2.3 From fc4a1accbb4ef372bb55b7ab161cf88e3b631935 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: virtio_fs: correct tags for config space fields Since fs is a modern-only device, tag config space fields as having little endian-ness. Signed-off-by: Michael S. Tsirkin Acked-by: Vivek Goyal Acked-by: Vivek Goyal Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_fs.h b/include/uapi/linux/virtio_fs.h index b02eb2ac3d99..3056b6e9f8ce 100644 --- a/include/uapi/linux/virtio_fs.h +++ b/include/uapi/linux/virtio_fs.h @@ -13,7 +13,7 @@ struct virtio_fs_config { __u8 tag[36]; /* Number of request queues */ - __u32 num_request_queues; + __le32 num_request_queues; } __attribute__((packed)); #endif /* _UAPI_LINUX_VIRTIO_FS_H */ -- cgit v1.2.3 From f378444b7c97e39358de5d50d01fb0e92f259073 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: virtio_gpu: correct tags for config space fields Since gpu is a modern-only device, tag config space fields as having little endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_gpu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 0c85914d9369..ccbd174ef321 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -320,10 +320,10 @@ struct virtio_gpu_resp_edid { #define VIRTIO_GPU_EVENT_DISPLAY (1 << 0) struct virtio_gpu_config { - __u32 events_read; - __u32 events_clear; - __u32 num_scanouts; - __u32 num_capsets; + __le32 events_read; + __le32 events_clear; + __le32 num_scanouts; + __le32 num_capsets; }; /* simple formats for fbcon/X use */ -- cgit v1.2.3 From 924b59a6dfa85dc9eac4c7f2fe1857bba2cb2510 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: virtio_input: correct tags for config space fields Since this is a modern-only device, tag config space fields as having little endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Gerd Hoffmann Reviewed-by: Gerd Hoffmann Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_input.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_input.h b/include/uapi/linux/virtio_input.h index a7fe5c8fb135..52084b1fb965 100644 --- a/include/uapi/linux/virtio_input.h +++ b/include/uapi/linux/virtio_input.h @@ -40,18 +40,18 @@ enum virtio_input_config_select { }; struct virtio_input_absinfo { - __u32 min; - __u32 max; - __u32 fuzz; - __u32 flat; - __u32 res; + __le32 min; + __le32 max; + __le32 fuzz; + __le32 flat; + __le32 res; }; struct virtio_input_devids { - __u16 bustype; - __u16 vendor; - __u16 product; - __u16 version; + __le16 bustype; + __le16 vendor; + __le16 product; + __le16 version; }; struct virtio_input_config { -- cgit v1.2.3 From 0ebcffcc2731682777bab19b51a512d8f31e1bdd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: virtio_iommu: correct tags for config space fields Since this is a modern-only device, tag config space fields as having little endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Jean-Philippe Brucker Reviewed-by: Jean-Philippe Brucker Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_iommu.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h index 48e3c29223b5..237e36a280cb 100644 --- a/include/uapi/linux/virtio_iommu.h +++ b/include/uapi/linux/virtio_iommu.h @@ -18,24 +18,24 @@ #define VIRTIO_IOMMU_F_MMIO 5 struct virtio_iommu_range_64 { - __u64 start; - __u64 end; + __le64 start; + __le64 end; }; struct virtio_iommu_range_32 { - __u32 start; - __u32 end; + __le32 start; + __le32 end; }; struct virtio_iommu_config { /* Supported page sizes */ - __u64 page_size_mask; + __le64 page_size_mask; /* Supported IOVA range */ struct virtio_iommu_range_64 input_range; /* Max domain ID size */ struct virtio_iommu_range_32 domain_range; /* Probe buffer size */ - __u32 probe_size; + __le32 probe_size; }; /* Request types */ -- cgit v1.2.3 From 79268954424771185fb4ca304786dd561a272246 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: virtio_mem: correct tags for config space fields Since this is a modern-only device, tag config space fields as having little endian-ness. TODO: check other uses of __virtioXX types in this header, should probably be __leXX. Signed-off-by: Michael S. Tsirkin Acked-by: David Hildenbrand Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_mem.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h index a9ffe041843c..70e01c687d5e 100644 --- a/include/uapi/linux/virtio_mem.h +++ b/include/uapi/linux/virtio_mem.h @@ -185,27 +185,27 @@ struct virtio_mem_resp { struct virtio_mem_config { /* Block size and alignment. Cannot change. */ - __u64 block_size; + __le64 block_size; /* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */ - __u16 node_id; + __le16 node_id; __u8 padding[6]; /* Start address of the memory region. Cannot change. */ - __u64 addr; + __le64 addr; /* Region size (maximum). Cannot change. */ - __u64 region_size; + __le64 region_size; /* * Currently usable region size. Can grow up to region_size. Can * shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config * update will be sent). */ - __u64 usable_region_size; + __le64 usable_region_size; /* * Currently used size. Changes due to plug/unplug requests, but no * config updates will be sent. */ - __u64 plugged_size; + __le64 plugged_size; /* Requested size. New plug requests cannot exceed it. Can change. */ - __u64 requested_size; + __le64 requested_size; }; #endif /* _LINUX_VIRTIO_MEM_H */ -- cgit v1.2.3 From 577e677a785357542311a645eeb1756cd83988be Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: virtio_net: correct tags for config space fields Tag config space fields as having virtio endian-ness. Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_net.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 19d23e5baa4e..27d996f29dd1 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -87,19 +87,19 @@ struct virtio_net_config { /* The config defining mac address (if VIRTIO_NET_F_MAC) */ __u8 mac[ETH_ALEN]; /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ - __u16 status; + __virtio16 status; /* Maximum number of each of transmit and receive queues; * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ. * Legal values are between 1 and 0x8000 */ - __u16 max_virtqueue_pairs; + __virtio16 max_virtqueue_pairs; /* Default maximum transmit unit advice */ - __u16 mtu; + __virtio16 mtu; /* * speed, in units of 1Mb. All values 0 to INT_MAX are legal. * Any other value stands for unknown. */ - __u32 speed; + __virtio32 speed; /* * 0x00 - half duplex * 0x01 - full duplex -- cgit v1.2.3 From a28feb855cc0ad452acd3dfe2b8f2841927da5f1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: virtio_pmem: correct tags for config space fields Since this is a modern-only device, tag config space fields as having little endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_pmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_pmem.h b/include/uapi/linux/virtio_pmem.h index b022787ffb94..d676b3620383 100644 --- a/include/uapi/linux/virtio_pmem.h +++ b/include/uapi/linux/virtio_pmem.h @@ -15,8 +15,8 @@ #include struct virtio_pmem_config { - __u64 start; - __u64 size; + __le64 start; + __le64 size; }; #define VIRTIO_PMEM_REQ_TYPE_FLUSH 0 -- cgit v1.2.3 From 965b5350514b597dc6347b733127e180844aeb43 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: virtio_scsi: correct tags for config space fields Tag config space fields as having virtio endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_scsi.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_scsi.h b/include/uapi/linux/virtio_scsi.h index cc18ef8825c0..0abaae4027c0 100644 --- a/include/uapi/linux/virtio_scsi.h +++ b/include/uapi/linux/virtio_scsi.h @@ -103,16 +103,16 @@ struct virtio_scsi_event { } __attribute__((packed)); struct virtio_scsi_config { - __u32 num_queues; - __u32 seg_max; - __u32 max_sectors; - __u32 cmd_per_lun; - __u32 event_info_size; - __u32 sense_size; - __u32 cdb_size; - __u16 max_channel; - __u16 max_target; - __u32 max_lun; + __virtio32 num_queues; + __virtio32 seg_max; + __virtio32 max_sectors; + __virtio32 cmd_per_lun; + __virtio32 event_info_size; + __virtio32 sense_size; + __virtio32 cdb_size; + __virtio16 max_channel; + __virtio16 max_target; + __virtio32 max_lun; } __attribute__((packed)); /* Feature Bits */ -- cgit v1.2.3 From 64ffa39dc860fb9772225c694353f73eca5801c6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 5 Aug 2020 05:39:36 -0400 Subject: virtio_net: use LE accessors for speed/duplex Speed and duplex config fields depend on VIRTIO_NET_F_SPEED_DUPLEX which being 63>31 depends on VIRTIO_F_VERSION_1. Accordingly, use LE accessors for these fields. Reported-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 27d996f29dd1..3f55a4215f11 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -99,7 +99,7 @@ struct virtio_net_config { * speed, in units of 1Mb. All values 0 to INT_MAX are legal. * Any other value stands for unknown. */ - __virtio32 speed; + __le32 speed; /* * 0x00 - half duplex * 0x01 - full duplex -- cgit v1.2.3 From 25abc060d282132ea5c945392f900dca0a7e9bbb Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 4 Aug 2020 19:20:40 +0300 Subject: vhost-vdpa: support IOTLB batching hints This patches extend the vhost IOTLB API to accept batch updating hints form userspace. When userspace wants update the device IOTLB in a batch, it may do: 1) Write vhost_iotlb_msg with VHOST_IOTLB_BATCH_BEGIN flag 2) Perform a batch of IOTLB updating via VHOST_IOTLB_UPDATE/INVALIDATE 3) Write vhost_iotlb_msg with VHOST_IOTLB_BATCH_END flag Vhost-vdpa may decide to batch the IOMMU/IOTLB updating in step 3 when vDPA device support set_map() ops. This is useful for the vDPA device that want to know all the mappings to tweak their own DMA translation logic. For vDPA device that doesn't require set_map(), no behavior changes. This capability is advertised via VHOST_BACKEND_F_IOTLB_BATCH capability. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200804162048.22587-5-eli@mellanox.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost.h | 2 ++ include/uapi/linux/vhost_types.h | 11 +++++++++++ 2 files changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 0c2349612e77..75232185324a 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -91,6 +91,8 @@ /* Use message type V2 */ #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 +/* IOTLB can accept batching hints */ +#define VHOST_BACKEND_F_IOTLB_BATCH 0x2 #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 669457ce5c48..9a269a88a6ff 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -60,6 +60,17 @@ struct vhost_iotlb_msg { #define VHOST_IOTLB_UPDATE 2 #define VHOST_IOTLB_INVALIDATE 3 #define VHOST_IOTLB_ACCESS_FAIL 4 +/* + * VHOST_IOTLB_BATCH_BEGIN and VHOST_IOTLB_BATCH_END allow modifying + * multiple mappings in one go: beginning with + * VHOST_IOTLB_BATCH_BEGIN, followed by any number of + * VHOST_IOTLB_UPDATE messages, and ending with VHOST_IOTLB_BATCH_END. + * When one of these two values is used as the message type, the rest + * of the fields in the message are ignored. There's no guarantee that + * these changes take place automatically in the device. + */ +#define VHOST_IOTLB_BATCH_BEGIN 5 +#define VHOST_IOTLB_BATCH_END 6 __u8 type; }; -- cgit v1.2.3 From 5e7b30205cef80f6bb922e61834437ca7bff5837 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 4 Aug 2020 22:50:56 -0700 Subject: bpf: Change uapi for bpf iterator map elements Commit a5cbe05a6673 ("bpf: Implement bpf iterator for map elements") added bpf iterator support for map elements. The map element bpf iterator requires info to identify a particular map. In the above commit, the attr->link_create.target_fd is used to carry map_fd and an enum bpf_iter_link_info is added to uapi to specify the target_fd actually representing a map_fd: enum bpf_iter_link_info { BPF_ITER_LINK_UNSPEC = 0, BPF_ITER_LINK_MAP_FD = 1, MAX_BPF_ITER_LINK_INFO, }; This is an extensible approach as we can grow enumerator for pid, cgroup_id, etc. and we can unionize target_fd for pid, cgroup_id, etc. But in the future, there are chances that more complex customization may happen, e.g., for tasks, it could be filtered based on both cgroup_id and user_id. This patch changed the uapi to have fields __aligned_u64 iter_info; __u32 iter_info_len; for additional iter_info for link_create. The iter_info is defined as union bpf_iter_link_info { struct { __u32 map_fd; } map; }; So future extension for additional customization will be easier. The bpf_iter_link_info will be passed to target callback to validate and generic bpf_iter framework does not need to deal it any more. Note that map_fd = 0 will be considered invalid and -EBADF will be returned to user space. Fixes: a5cbe05a6673 ("bpf: Implement bpf iterator for map elements") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805055056.1457463-1-yhs@fb.com --- include/uapi/linux/bpf.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b134e679e9db..0480f893facd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type */ }; +union bpf_iter_link_info { + struct { + __u32 map_fd; + } map; +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -249,13 +255,6 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; -enum bpf_iter_link_info { - BPF_ITER_LINK_UNSPEC = 0, - BPF_ITER_LINK_MAP_FD = 1, - - MAX_BPF_ITER_LINK_INFO, -}; - /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -623,6 +622,8 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ -- cgit v1.2.3 From 7f317d34906c1033f0752fc137dda04e43979bb8 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Tue, 11 Aug 2020 18:34:19 -0700 Subject: include/: replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Signed-off-by: Alexander A. Klimov Signed-off-by: Andrew Morton Reviewed-by: Kees Cook Link: http://lkml.kernel.org/r/20200726110117.16346-1-grandmaster@al2klimov.de Signed-off-by: Linus Torvalds --- include/uapi/linux/elf.h | 2 +- include/uapi/linux/map_to_7segment.h | 2 +- include/uapi/linux/types.h | 2 +- include/uapi/linux/usb/ch9.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index c6dd0215482e..22220945a5fd 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -53,7 +53,7 @@ typedef __s64 Elf64_Sxword; * * - Oracle: Linker and Libraries. * Part No: 817–1984–19, August 2011. - * http://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf + * https://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf * * - System V ABI AMD64 Architecture Processor Supplement * Draft Version 0.99.4, diff --git a/include/uapi/linux/map_to_7segment.h b/include/uapi/linux/map_to_7segment.h index f9ed18134b83..13a06e5e966e 100644 --- a/include/uapi/linux/map_to_7segment.h +++ b/include/uapi/linux/map_to_7segment.h @@ -24,7 +24,7 @@ * of (ASCII) characters to a 7-segments notation. * * The 7 segment's wikipedia notation below is used as standard. - * See: http://en.wikipedia.org/wiki/Seven_segment_display + * See: https://en.wikipedia.org/wiki/Seven_segment_display * * Notation: +-a-+ * f b diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index 2fce8b6876e9..f6d2f83cbe29 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -7,7 +7,7 @@ #ifndef __ASSEMBLY__ #ifndef __KERNEL__ #ifndef __EXPORTED_HEADERS__ -#warning "Attempt to use kernel headers from user space, see http://kernelnewbies.org/KernelHeaders" +#warning "Attempt to use kernel headers from user space, see https://kernelnewbies.org/KernelHeaders" #endif /* __EXPORTED_HEADERS__ */ #endif diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 48766fdf6580..0f865ae4ba89 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -1229,7 +1229,7 @@ struct usb_set_sel_req { * As per USB compliance update, a device that is actively drawing * more than 100mA from USB must report itself as bus-powered in * the GetStatus(DEVICE) call. - * http://compliance.usb.org/index.asp?UpdateFile=Electrical&Format=Standard#34 + * https://compliance.usb.org/index.asp?UpdateFile=Electrical&Format=Standard#34 */ #define USB_SELF_POWER_VBUS_MAX_DRAW 100 -- cgit v1.2.3 From 2fb3244f0a58ceb3d866ac63f644dfa31cae430f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:35:21 -0700 Subject: autofs: fix doubled word Change doubled word "is" to "it is". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Acked-by: Ian Kent Link: http://lkml.kernel.org/r/5a82befd-40f8-8dc0-3498-cbc0436cad9b@infradead.org Signed-off-by: Linus Torvalds --- include/uapi/linux/auto_dev-ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/auto_dev-ioctl.h b/include/uapi/linux/auto_dev-ioctl.h index 374742651c30..62e625356dc8 100644 --- a/include/uapi/linux/auto_dev-ioctl.h +++ b/include/uapi/linux/auto_dev-ioctl.h @@ -82,7 +82,7 @@ struct args_ismountpoint { /* * All the ioctls use this structure. * When sending a path size must account for the total length - * of the chunk of memory otherwise is is the size of the + * of the chunk of memory otherwise it is the size of the * structure. */ -- cgit v1.2.3 From 004a01241c5a0d375266ebf1c72f208de99294e9 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Aug 2020 19:06:04 +0200 Subject: arm64/x86: KVM: Introduce steal-time cap arm64 requires a vcpu fd (KVM_HAS_DEVICE_ATTR vcpu ioctl) to probe support for steal-time. However this is unnecessary, as only a KVM fd is required, and it complicates userspace (userspace may prefer delaying vcpu creation until after feature probing). Introduce a cap that can be checked instead. While x86 can already probe steal-time support with a kvm fd (KVM_GET_SUPPORTED_CPUID), we add the cap there too for consistency. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20200804170604.42662-7-drjones@redhat.com --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f6d86033c4fa..3d8023474f2a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1035,6 +1035,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_LAST_CPU 184 #define KVM_CAP_SMALLER_MAXPHYADDR 185 #define KVM_CAP_S390_DIAG318 186 +#define KVM_CAP_STEAL_TIME 187 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From da9125df854ea48a6240c66e8a67be06e2c12c03 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 20 Aug 2020 14:12:55 +0200 Subject: netfilter: nf_tables: incorrect enum nft_list_attributes definition This should be NFTA_LIST_UNSPEC instead of NFTA_LIST_UNPEC, all other similar attribute definitions are postfixed with _UNSPEC. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 42f351c1f5c5..2b8e12f7a4a6 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -133,7 +133,7 @@ enum nf_tables_msg_types { * @NFTA_LIST_ELEM: list element (NLA_NESTED) */ enum nft_list_attributes { - NFTA_LIST_UNPEC, + NFTA_LIST_UNSPEC, NFTA_LIST_ELEM, __NFTA_LIST_MAX }; -- cgit v1.2.3 From b16fc097bc283184cde40e5b30d15705e1590410 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 21 Aug 2020 15:36:42 +0200 Subject: bpf: Fix two typos in uapi/linux/bpf.h Also remove trailing whitespaces in bpf_skb_get_tunnel_key example code. Signed-off-by: Tobias Klauser Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200821133642.18870-1-tklauser@distanz.ch --- include/uapi/linux/bpf.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0480f893facd..b6238b2209b7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -767,7 +767,7 @@ union bpf_attr { * * Also, note that **bpf_trace_printk**\ () is slow, and should * only be used for debugging purposes. For this reason, a notice - * bloc (spanning several lines) is printed to kernel logs and + * block (spanning several lines) is printed to kernel logs and * states that the helper should not be used "for production use" * the first time this helper is used (or more precisely, when * **trace_printk**\ () buffers are allocated). For passing values @@ -1033,14 +1033,14 @@ union bpf_attr { * * int ret; * struct bpf_tunnel_key key = {}; - * + * * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); * if (ret < 0) * return TC_ACT_SHOT; // drop packet - * + * * if (key.remote_ipv4 != 0x0a000001) * return TC_ACT_SHOT; // drop packet - * + * * return TC_ACT_OK; // accept packet * * This interface can also be used with all encapsulation devices @@ -1147,7 +1147,7 @@ union bpf_attr { * Description * Retrieve the realm or the route, that is to say the * **tclassid** field of the destination for the *skb*. The - * indentifier retrieved is a user-provided tag, similar to the + * identifier retrieved is a user-provided tag, similar to the * one used with the net_cls cgroup (see description for * **bpf_get_cgroup_classid**\ () helper), but here this tag is * held by a route (a destination entry), not by a task. -- cgit v1.2.3 From 645f08975f49441b3e753d8dc5b740cbcb226594 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 27 Aug 2020 07:27:49 -0400 Subject: net: Fix some comments Fix some comments, including wrong function name, duplicated word and so on. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 3d0d8231dc19..7d6687618d80 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -135,7 +135,7 @@ struct in_addr { * this socket to prevent accepting spoofed ones. */ #define IP_PMTUDISC_INTERFACE 4 -/* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get +/* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get * fragmented if they exeed the interface mtu */ #define IP_PMTUDISC_OMIT 5 -- cgit v1.2.3 From 15e9e35cd1dec2bc138464de6bf8ef828df19235 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 10 Sep 2020 18:33:51 +0800 Subject: KVM: MIPS: Change the definition of kvm type MIPS defines two kvm types: #define KVM_VM_MIPS_TE 0 #define KVM_VM_MIPS_VZ 1 In Documentation/virt/kvm/api.rst it is said that "You probably want to use 0 as machine type", which implies that type 0 be the "automatic" or "default" type. And, in user-space libvirt use the null-machine (with type 0) to detect the kvm capability, which returns "KVM not supported" on a VZ platform. I try to fix it in QEMU but it is ugly: https://lists.nongnu.org/archive/html/qemu-devel/2020-08/msg05629.html And Thomas Huth suggests me to change the definition of kvm type: https://lists.nongnu.org/archive/html/qemu-devel/2020-09/msg03281.html So I define like this: #define KVM_VM_MIPS_AUTO 0 #define KVM_VM_MIPS_VZ 1 #define KVM_VM_MIPS_TE 2 Since VZ and TE cannot co-exists, using type 0 on a TE platform will still return success (so old user-space tools have no problems on new kernels); the advantage is that using type 0 on a VZ platform will not return failure. So, the only problem is "new user-space tools use type 2 on old kernels", but if we treat this as a kernel bug, we can backport this patch to old stable kernels. Signed-off-by: Huacai Chen Message-Id: <1599734031-28746-1-git-send-email-chenhc@lemote.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3d8023474f2a..7d8eced6f459 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -790,9 +790,10 @@ struct kvm_ppc_resize_hpt { #define KVM_VM_PPC_HV 1 #define KVM_VM_PPC_PR 2 -/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */ -#define KVM_VM_MIPS_TE 0 +/* on MIPS, 0 indicates auto, 1 forces VZ ASE, 2 forces trap & emulate */ +#define KVM_VM_MIPS_AUTO 0 #define KVM_VM_MIPS_VZ 1 +#define KVM_VM_MIPS_TE 2 #define KVM_S390_SIE_PAGE_OFFSET 1 -- cgit v1.2.3 From 129134e5415d46f38b9978b3809af94ed649b57d Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Fri, 11 Sep 2020 05:07:58 +0200 Subject: media: media/v4l2: remove V4L2_FLAG_MEMORY_NON_CONSISTENT flag The patch partially reverts some of the UAPI bits of the buffer cache management hints. Namely, the queue consistency (memory coherency) user-space hint because, as it turned out, the kernel implementation of this feature was misusing DMA_ATTR_NON_CONSISTENT. The patch reverts both kernel and user space parts: removes the DMA consistency attr functions, rolls back changes to v4l2_requestbuffers, v4l2_create_buffers structures and corresponding UAPI functions (plus compat32 layer) and cleans up the documentation. [hverkuil: fixed a few typos in the commit log] [hverkuil: fixed vb2_core_reqbufs call in drivers/media/dvb-core/dvb_vb2.c] [mchehab: fixed a typo in the commit log: revers->reverts] Signed-off-by: Christoph Hellwig Signed-off-by: Sergey Senozhatsky Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index c7b70ff53bc1..235db7754606 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -191,8 +191,6 @@ enum v4l2_memory { V4L2_MEMORY_DMABUF = 4, }; -#define V4L2_FLAG_MEMORY_NON_CONSISTENT (1 << 0) - /* see also http://vektor.theorem.ca/graphics/ycbcr/ */ enum v4l2_colorspace { /* @@ -949,10 +947,7 @@ struct v4l2_requestbuffers { __u32 type; /* enum v4l2_buf_type */ __u32 memory; /* enum v4l2_memory */ __u32 capabilities; - union { - __u32 flags; - __u32 reserved[1]; - }; + __u32 reserved[1]; }; /* capabilities for struct v4l2_requestbuffers and v4l2_create_buffers */ @@ -2456,9 +2451,6 @@ struct v4l2_dbg_chip_info { * @memory: enum v4l2_memory; buffer memory type * @format: frame format, for which buffers are requested * @capabilities: capabilities of this buffer type. - * @flags: additional buffer management attributes (ignored unless the - * queue has V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS capability - * and configured for MMAP streaming I/O). * @reserved: future extensions */ struct v4l2_create_buffers { @@ -2467,8 +2459,7 @@ struct v4l2_create_buffers { __u32 memory; struct v4l2_format format; __u32 capabilities; - __u32 flags; - __u32 reserved[6]; + __u32 reserved[7]; }; /* -- cgit v1.2.3 From 19a83d36f9837e8bd27435ebb31564a717a5d15a Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 17 Sep 2020 01:04:10 +0200 Subject: ethtool: add and use message type for tunnel info reply Tunnel offload info code uses ETHTOOL_MSG_TUNNEL_INFO_GET message type (cmd field in genetlink header) for replies to tunnel info netlink request, i.e. the same value as the request have. This is a problem because we are using two separate enums for userspace to kernel and kernel to userspace message types so that this ETHTOOL_MSG_TUNNEL_INFO_GET (28) collides with ETHTOOL_MSG_CABLE_TEST_TDR_NTF which is what message type 28 means for kernel to userspace messages. As the tunnel info request reached mainline in 5.9 merge window, we should still be able to fix the reply message type without breaking backward compatibility. Fixes: c7d759eb7b12 ("ethtool: add tunnel info interface") Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 5dcd24cb33ea..72ba36be9655 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -79,6 +79,7 @@ enum { ETHTOOL_MSG_TSINFO_GET_REPLY, ETHTOOL_MSG_CABLE_TEST_NTF, ETHTOOL_MSG_CABLE_TEST_TDR_NTF, + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, -- cgit v1.2.3 From ad2b9b0f8d0107602bdc1f3b1ab90719842ace11 Mon Sep 17 00:00:00 2001 From: Priyaranjan Jha Date: Thu, 24 Sep 2020 15:23:14 -0700 Subject: tcp: skip DSACKs with dubious sequence ranges Currently, we use length of DSACKed range to compute number of delivered packets. And if sequence range in DSACK is corrupted, we can get bogus dsacked/acked count, and bogus cwnd. This patch put bounds on DSACKed range to skip update of data delivery and spurious retransmission information, if the DSACK is unlikely caused by sender's action: - DSACKed range shouldn't be greater than maximum advertised rwnd. - Total no. of DSACKed segments shouldn't be greater than total no. of retransmitted segs. Unlike spurious retransmits, network duplicates or corrupted DSACKs shouldn't be counted as delivery. Signed-off-by: Priyaranjan Jha Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index cee9f8e6fce3..f84e7bcad6de 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -288,6 +288,7 @@ enum LINUX_MIB_TCPTIMEOUTREHASH, /* TCPTimeoutRehash */ LINUX_MIB_TCPDUPLICATEDATAREHASH, /* TCPDuplicateDataRehash */ LINUX_MIB_TCPDSACKRECVSEGS, /* TCPDSACKRecvSegs */ + LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */ __LINUX_MIB_MAX }; -- cgit v1.2.3 From 2d914c1bf079491d1113051a7232250267f3f2e4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 30 Sep 2020 21:27:18 +0100 Subject: rxrpc: Fix accept on a connection that need securing When a new incoming call arrives at an userspace rxrpc socket on a new connection that has a security class set, the code currently pushes it onto the accept queue to hold a ref on it for the socket. This doesn't work, however, as recvmsg() pops it off, notices that it's in the SERVER_SECURING state and discards the ref. This means that the call runs out of refs too early and the kernel oopses. By contrast, a kernel rxrpc socket manually pre-charges the incoming call pool with calls that already have user call IDs assigned, so they are ref'd by the call tree on the socket. Change the mode of operation for userspace rxrpc server sockets to work like this too. Although this is a UAPI change, server sockets aren't currently functional. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells --- include/uapi/linux/rxrpc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h index 4accfa7e266d..8f8dc7a937a4 100644 --- a/include/uapi/linux/rxrpc.h +++ b/include/uapi/linux/rxrpc.h @@ -51,11 +51,11 @@ enum rxrpc_cmsg_type { RXRPC_BUSY = 6, /* -r: server busy received [terminal] */ RXRPC_LOCAL_ERROR = 7, /* -r: local error generated [terminal] */ RXRPC_NEW_CALL = 8, /* -r: [Service] new incoming call notification */ - RXRPC_ACCEPT = 9, /* s-: [Service] accept request */ RXRPC_EXCLUSIVE_CALL = 10, /* s-: Call should be on exclusive connection */ RXRPC_UPGRADE_SERVICE = 11, /* s-: Request service upgrade for client call */ RXRPC_TX_LENGTH = 12, /* s-: Total length of Tx data */ RXRPC_SET_CALL_TIMEOUT = 13, /* s-: Set one or more call timeouts */ + RXRPC_CHARGE_ACCEPT = 14, /* s-: Charge the accept pool with a user call ID */ RXRPC__SUPPORTED }; -- cgit v1.2.3