From 40ce74d1b28d38e5debc14b5a6ddd9071ae2d310 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 5 Feb 2020 17:59:15 -0500 Subject: drm/amdkfd: add svm ioctl API Add svm (shared virtual memory) ioctl data structure and API definition. The svm ioctl API is designed to be extensible in the future. All operations are provided by a single IOCTL to preserve ioctl number space. The arguments structure ends with a variable size array of attributes that can be used to set or get one or multiple attributes. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 130 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 128 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index bf5e7d7846dd..247b57baa94f 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -30,9 +30,10 @@ * - 1.1 - initial version * - 1.3 - Add SMI events support * - 1.4 - Indicate new SRAM EDC bit in device properties + * - 1.5 - Add SVM API */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 4 +#define KFD_IOCTL_MINOR_VERSION 5 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -473,6 +474,129 @@ enum kfd_mmio_remap { KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4, }; +/* Guarantee host access to memory */ +#define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x00000001 +/* Fine grained coherency between all devices with access */ +#define KFD_IOCTL_SVM_FLAG_COHERENT 0x00000002 +/* Use any GPU in same hive as preferred device */ +#define KFD_IOCTL_SVM_FLAG_HIVE_LOCAL 0x00000004 +/* GPUs only read, allows replication */ +#define KFD_IOCTL_SVM_FLAG_GPU_RO 0x00000008 +/* Allow execution on GPU */ +#define KFD_IOCTL_SVM_FLAG_GPU_EXEC 0x00000010 +/* GPUs mostly read, may allow similar optimizations as RO, but writes fault */ +#define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY 0x00000020 + +/** + * kfd_ioctl_svm_op - SVM ioctl operations + * + * @KFD_IOCTL_SVM_OP_SET_ATTR: Modify one or more attributes + * @KFD_IOCTL_SVM_OP_GET_ATTR: Query one or more attributes + */ +enum kfd_ioctl_svm_op { + KFD_IOCTL_SVM_OP_SET_ATTR, + KFD_IOCTL_SVM_OP_GET_ATTR +}; + +/** kfd_ioctl_svm_location - Enum for preferred and prefetch locations + * + * GPU IDs are used to specify GPUs as preferred and prefetch locations. + * Below definitions are used for system memory or for leaving the preferred + * location unspecified. + */ +enum kfd_ioctl_svm_location { + KFD_IOCTL_SVM_LOCATION_SYSMEM = 0, + KFD_IOCTL_SVM_LOCATION_UNDEFINED = 0xffffffff +}; + +/** + * kfd_ioctl_svm_attr_type - SVM attribute types + * + * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: gpuid of the preferred location, 0 for + * system memory + * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: gpuid of the prefetch location, 0 for + * system memory. Setting this triggers an + * immediate prefetch (migration). + * @KFD_IOCTL_SVM_ATTR_ACCESS: + * @KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: + * @KFD_IOCTL_SVM_ATTR_NO_ACCESS: specify memory access for the gpuid given + * by the attribute value + * @KFD_IOCTL_SVM_ATTR_SET_FLAGS: bitmask of flags to set (see + * KFD_IOCTL_SVM_FLAG_...) + * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS: bitmask of flags to clear + * @KFD_IOCTL_SVM_ATTR_GRANULARITY: migration granularity + * (log2 num pages) + */ +enum kfd_ioctl_svm_attr_type { + KFD_IOCTL_SVM_ATTR_PREFERRED_LOC, + KFD_IOCTL_SVM_ATTR_PREFETCH_LOC, + KFD_IOCTL_SVM_ATTR_ACCESS, + KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE, + KFD_IOCTL_SVM_ATTR_NO_ACCESS, + KFD_IOCTL_SVM_ATTR_SET_FLAGS, + KFD_IOCTL_SVM_ATTR_CLR_FLAGS, + KFD_IOCTL_SVM_ATTR_GRANULARITY +}; + +/** + * kfd_ioctl_svm_attribute - Attributes as pairs of type and value + * + * The meaning of the @value depends on the attribute type. + * + * @type: attribute type (see enum @kfd_ioctl_svm_attr_type) + * @value: attribute value + */ +struct kfd_ioctl_svm_attribute { + __u32 type; + __u32 value; +}; + +/** + * kfd_ioctl_svm_args - Arguments for SVM ioctl + * + * @op specifies the operation to perform (see enum + * @kfd_ioctl_svm_op). @start_addr and @size are common for all + * operations. + * + * A variable number of attributes can be given in @attrs. + * @nattr specifies the number of attributes. New attributes can be + * added in the future without breaking the ABI. If unknown attributes + * are given, the function returns -EINVAL. + * + * @KFD_IOCTL_SVM_OP_SET_ATTR sets attributes for a virtual address + * range. It may overlap existing virtual address ranges. If it does, + * the existing ranges will be split such that the attribute changes + * only apply to the specified address range. + * + * @KFD_IOCTL_SVM_OP_GET_ATTR returns the intersection of attributes + * over all memory in the given range and returns the result as the + * attribute value. If different pages have different preferred or + * prefetch locations, 0xffffffff will be returned for + * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC or + * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC resepctively. For + * @KFD_IOCTL_SVM_ATTR_SET_FLAGS, flags of all pages will be + * aggregated by bitwise AND. The minimum migration granularity + * throughout the range will be returned for + * @KFD_IOCTL_SVM_ATTR_GRANULARITY. + * + * Querying of accessibility attributes works by initializing the + * attribute type to @KFD_IOCTL_SVM_ATTR_ACCESS and the value to the + * GPUID being queried. Multiple attributes can be given to allow + * querying multiple GPUIDs. The ioctl function overwrites the + * attribute type to indicate the access for the specified GPU. + * + * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS is invalid for + * @KFD_IOCTL_SVM_OP_GET_ATTR. + */ +struct kfd_ioctl_svm_args { + __u64 start_addr; + __u64 size; + __u32 op; + __u32 nattr; + /* Variable length array of attributes */ + struct kfd_ioctl_svm_attribute attrs[0]; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -573,7 +697,9 @@ enum kfd_mmio_remap { #define AMDKFD_IOC_SMI_EVENTS \ AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args) +#define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x20 +#define AMDKFD_COMMAND_END 0x21 #endif -- cgit v1.2.3 From 0f7b5c44d4c53710993e4773bd6eaf171f1888e6 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 28 May 2020 18:27:05 -0500 Subject: drm/amdkfd: add ioctl to configure and query xnack retries Xnack retries are used for page fault recovery. Some AMD chip families support continuously retry while page table entries are invalid. The driver must handle the page fault interrupt and fill in a valid entry for the GPU to continue. This ioctl allows to enable/disable XNACK retries per KFD process. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 43 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 247b57baa94f..3cb5b5dd9f77 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -597,6 +597,44 @@ struct kfd_ioctl_svm_args { struct kfd_ioctl_svm_attribute attrs[0]; }; +/** + * kfd_ioctl_set_xnack_mode_args - Arguments for set_xnack_mode + * + * @xnack_enabled: [in/out] Whether to enable XNACK mode for this process + * + * @xnack_enabled indicates whether recoverable page faults should be + * enabled for the current process. 0 means disabled, positive means + * enabled, negative means leave unchanged. If enabled, virtual address + * translations on GFXv9 and later AMD GPUs can return XNACK and retry + * the access until a valid PTE is available. This is used to implement + * device page faults. + * + * On output, @xnack_enabled returns the (new) current mode (0 or + * positive). Therefore, a negative input value can be used to query + * the current mode without changing it. + * + * The XNACK mode fundamentally changes the way SVM managed memory works + * in the driver, with subtle effects on application performance and + * functionality. + * + * Enabling XNACK mode requires shader programs to be compiled + * differently. Furthermore, not all GPUs support changing the mode + * per-process. Therefore changing the mode is only allowed while no + * user mode queues exist in the process. This ensure that no shader + * code is running that may be compiled for the wrong mode. And GPUs + * that cannot change to the requested mode will prevent the XNACK + * mode from occurring. All GPUs used by the process must be in the + * same XNACK mode. + * + * GFXv8 or older GPUs do not support 48 bit virtual addresses or SVM. + * Therefore those GPUs are not considered for the XNACK mode switch. + * + * Return: 0 on success, -errno on failure + */ +struct kfd_ioctl_set_xnack_mode_args { + __s32 xnack_enabled; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -699,7 +737,10 @@ struct kfd_ioctl_svm_args { #define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args) +#define AMDKFD_IOC_SET_XNACK_MODE \ + AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x21 +#define AMDKFD_COMMAND_END 0x22 #endif -- cgit v1.2.3 From 7b229b13d78d112e2c5d4a60a3c6f602289959fa Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 10 Apr 2021 19:56:05 -0700 Subject: HID: hid-input: add mapping for emoji picker key HUTRR101 added a new usage code for a key that is supposed to invoke and dismiss an emoji picker widget to assist users to locate and enter emojis. This patch adds a new key definition KEY_EMOJI_PICKER and maps 0x0c/0x0d9 usage code to this new keycode. Additionally hid-debug is adjusted to recognize this new usage code as well. Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- include/uapi/linux/input-event-codes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index ee93428ced9a..225ec87d4f22 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -611,6 +611,7 @@ #define KEY_VOICECOMMAND 0x246 /* Listening Voice Command */ #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ +#define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ -- cgit v1.2.3 From 63c8af5687f6b1b70e9458cac1ffb25e86db1695 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 10 May 2021 08:48:06 +0900 Subject: block: uapi: fix comment about block device ioctl Fix the comment mentioning ioctl command range used for zoned block devices to reflect the range of commands actually implemented. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210509234806.3000-1-damien.lemoal@wdc.com Signed-off-by: Jens Axboe --- include/uapi/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index f44eb0a04afd..4c32e97dcdf0 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -185,7 +185,7 @@ struct fsxattr { #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) /* - * A jump here: 130-131 are reserved for zoned block devices + * A jump here: 130-136 are reserved for zoned block devices * (see uapi/linux/blkzoned.h) */ -- cgit v1.2.3 From 7ac592aa35a684ff1858fb9ec282886b9e3575ac Mon Sep 17 00:00:00 2001 From: Chris Hyser Date: Wed, 24 Mar 2021 17:40:15 -0400 Subject: sched: prctl() core-scheduling interface This patch provides support for setting and copying core scheduling 'task cookies' between threads (PID), processes (TGID), and process groups (PGID). The value of core scheduling isn't that tasks don't share a core, 'nosmt' can do that. The value lies in exploiting all the sharing opportunities that exist to recover possible lost performance and that requires a degree of flexibility in the API. From a security perspective (and there are others), the thread, process and process group distinction is an existent hierarchal categorization of tasks that reflects many of the security concerns about 'data sharing'. For example, protecting against cache-snooping by a thread that can just read the memory directly isn't all that useful. With this in mind, subcommands to CREATE/SHARE (TO/FROM) provide a mechanism to create and share cookies. CREATE/SHARE_TO specify a target pid with enum pidtype used to specify the scope of the targeted tasks. For example, PIDTYPE_TGID will share the cookie with the process and all of it's threads as typically desired in a security scenario. API: prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, tgtpid, pidtype, &cookie) prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, tgtpid, pidtype, NULL) prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, tgtpid, pidtype, NULL) prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, srcpid, pidtype, NULL) where 'tgtpid/srcpid == 0' implies the current process and pidtype is kernel enum pid_type {PIDTYPE_PID, PIDTYPE_TGID, PIDTYPE_PGID, ...}. For return values, EINVAL, ENOMEM are what they say. ESRCH means the tgtpid/srcpid was not found. EPERM indicates lack of PTRACE permission access to tgtpid/srcpid. ENODEV indicates your machines lacks SMT. [peterz: complete rewrite] Signed-off-by: Chris Hyser Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123309.039845339@infradead.org --- include/uapi/linux/prctl.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 18a9f59dc067..967d9c55323d 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -259,4 +259,12 @@ struct prctl_mm_map { #define PR_PAC_SET_ENABLED_KEYS 60 #define PR_PAC_GET_ENABLED_KEYS 61 +/* Request the scheduler to share a core */ +#define PR_SCHED_CORE 62 +# define PR_SCHED_CORE_GET 0 +# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ +# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ +# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ +# define PR_SCHED_CORE_MAX 4 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From ed5aecd3da2eabd8a6c9f5593df2c4f00985fca2 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 5 May 2021 11:18:54 +0200 Subject: tty: remove broken r3964 line discipline Noone stepped up in the past two years since it was marked as BROKEN by commit c7084edc3f6d (tty: mark Siemens R3964 line discipline as BROKEN). Remove the line discipline for good. Three remarks: * we remove also the uapi header (as noone is able to use that interface anyway) * we do *not* remove the N_R3964 constant definition from tty.h, so it remains reserved. * in_interrupt() check is now removed from vt's con_put_char. Noone else calls tty_operations::put_char from interrupt context. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20210505091928.22010-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/n_r3964.h | 99 -------------------------------------------- 1 file changed, 99 deletions(-) delete mode 100644 include/uapi/linux/n_r3964.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/n_r3964.h b/include/uapi/linux/n_r3964.h deleted file mode 100644 index 6bbd18520f30..000000000000 --- a/include/uapi/linux/n_r3964.h +++ /dev/null @@ -1,99 +0,0 @@ -/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */ -/* r3964 linediscipline for linux - * - * ----------------------------------------------------------- - * Copyright by - * Philips Automation Projects - * Kassel (Germany) - * ----------------------------------------------------------- - * This software may be used and distributed according to the terms of - * the GNU General Public License, incorporated herein by reference. - * - * Author: - * L. Haag - * - * $Log: r3964.h,v $ - * Revision 1.4 2005/12/21 19:54:24 Kurt Huwig - * Fixed HZ usage on 2.6 kernels - * Removed unnecessary include - * - * Revision 1.3 2001/03/18 13:02:24 dwmw2 - * Fix timer usage, use spinlocks properly. - * - * Revision 1.2 2001/03/18 12:53:15 dwmw2 - * Merge changes in 2.4.2 - * - * Revision 1.1.1.1 1998/10/13 16:43:14 dwmw2 - * This'll screw the version control - * - * Revision 1.6 1998/09/30 00:40:38 dwmw2 - * Updated to use kernel's N_R3964 if available - * - * Revision 1.4 1998/04/02 20:29:44 lhaag - * select, blocking, ... - * - * Revision 1.3 1998/02/12 18:58:43 root - * fixed some memory leaks - * calculation of checksum characters - * - * Revision 1.2 1998/02/07 13:03:17 root - * ioctl read_telegram - * - * Revision 1.1 1998/02/06 19:19:43 root - * Initial revision - * - * - */ - -#ifndef _UAPI__LINUX_N_R3964_H__ -#define _UAPI__LINUX_N_R3964_H__ - -/* line disciplines for r3964 protocol */ - - -/* - * Ioctl-commands - */ - -#define R3964_ENABLE_SIGNALS 0x5301 -#define R3964_SETPRIORITY 0x5302 -#define R3964_USE_BCC 0x5303 -#define R3964_READ_TELEGRAM 0x5304 - -/* Options for R3964_SETPRIORITY */ -#define R3964_MASTER 0 -#define R3964_SLAVE 1 - -/* Options for R3964_ENABLE_SIGNALS */ -#define R3964_SIG_ACK 0x0001 -#define R3964_SIG_DATA 0x0002 -#define R3964_SIG_ALL 0x000f -#define R3964_SIG_NONE 0x0000 -#define R3964_USE_SIGIO 0x1000 - -/* - * r3964 operation states: - */ - -/* types for msg_id: */ -enum {R3964_MSG_ACK=1, R3964_MSG_DATA }; - -#define R3964_MAX_MSG_COUNT 32 - -/* error codes for client messages */ -#define R3964_OK 0 /* no error. */ -#define R3964_TX_FAIL -1 /* transmission error, block NOT sent */ -#define R3964_OVERFLOW -2 /* msg queue overflow */ - -/* the client gets this struct when calling read(fd,...): */ -struct r3964_client_message { - int msg_id; - int arg; - int error_code; -}; - -#define R3964_MTU 256 - - - -#endif /* _UAPI__LINUX_N_R3964_H__ */ -- cgit v1.2.3 From b7fb0916544de44ce099d9f3b6129c86b484de25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Thu, 13 May 2021 15:20:52 +0200 Subject: net: bridge: mcast: add ip4+ip6 mcast router timers to mdb netlink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have split the multicast router state into two, one for IPv4 and one for IPv6, also add individual timers to the mdb netlink router port dump. Leaving the old timer attribute for backwards compatibility. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 13d59c51ef5b..6b56a7549531 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -627,6 +627,8 @@ enum { MDBA_ROUTER_PATTR_UNSPEC, MDBA_ROUTER_PATTR_TIMER, MDBA_ROUTER_PATTR_TYPE, + MDBA_ROUTER_PATTR_INET_TIMER, + MDBA_ROUTER_PATTR_INET6_TIMER, __MDBA_ROUTER_PATTR_MAX }; #define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1) -- cgit v1.2.3 From 0683b53197b55343a166f1507086823030809a19 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 2 May 2021 17:28:31 -0500 Subject: signal: Deliver all of the siginfo perf data in _perf Don't abuse si_errno and deliver all of the perf data in _perf member of siginfo_t. Note: The data field in the perf data structures in a u64 to allow a pointer to be encoded without needed to implement a 32bit and 64bit version of the same structure. There already exists a 32bit and 64bit versions siginfo_t, and the 32bit version can not include a 64bit member as it only has 32bit alignment. So unsigned long is used in siginfo_t instead of a u64 as unsigned long can encode a pointer on all architectures linux supports. v1: https://lkml.kernel.org/r/m11rarqqx2.fsf_-_@fess.ebiederm.org v2: https://lkml.kernel.org/r/20210503203814.25487-10-ebiederm@xmission.com v3: https://lkml.kernel.org/r/20210505141101.11519-11-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-4-ebiederm@xmission.com Reviewed-by: Marco Elver Signed-off-by: "Eric W. Biederman" --- include/uapi/linux/perf_event.h | 2 +- include/uapi/linux/signalfd.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e54e639248c8..7b14753b3d38 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -464,7 +464,7 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via - * siginfo_t::si_perf, e.g. to permit user to identify the event. + * siginfo_t::si_perf_data, e.g. to permit user to identify the event. */ __u64 sig_data; }; diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h index 7e333042c7e3..e78dddf433fc 100644 --- a/include/uapi/linux/signalfd.h +++ b/include/uapi/linux/signalfd.h @@ -39,8 +39,8 @@ struct signalfd_siginfo { __s32 ssi_syscall; __u64 ssi_call_addr; __u32 ssi_arch; - __u32 __pad3; - __u64 ssi_perf; + __u32 ssi_perf_type; + __u64 ssi_perf_data; /* * Pad strcture to 128 bytes. Remember to update the -- cgit v1.2.3 From 922e3013046b79b444c87eda5baf43afae1326a8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 3 May 2021 12:52:43 -0500 Subject: signalfd: Remove SIL_PERF_EVENT fields from signalfd_siginfo With the addition of ssi_perf_data and ssi_perf_type struct signalfd_siginfo is dangerously close to running out of space. All that remains is just enough space for two additional 64bit fields. A practice of adding all possible siginfo_t fields into struct singalfd_siginfo can not be supported as adding the missing fields ssi_lower, ssi_upper, and ssi_pkey would require two 64bit fields and one 32bit fields. In practice the fields ssi_perf_data and ssi_perf_type can never be used by signalfd as the signal that generates them always delivers them synchronously to the thread that triggers them. Therefore until someone actually needs the fields ssi_perf_data and ssi_perf_type in signalfd_siginfo remove them. This leaves a bit more room for future expansion. v1: https://lkml.kernel.org/r/20210503203814.25487-12-ebiederm@xmission.com v2: https://lkml.kernel.org/r/20210505141101.11519-12-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-5-ebiederm@xmission.com Reviewed-by: Marco Elver Signed-off-by: "Eric W. Biederman" --- include/uapi/linux/signalfd.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h index e78dddf433fc..83429a05b698 100644 --- a/include/uapi/linux/signalfd.h +++ b/include/uapi/linux/signalfd.h @@ -39,8 +39,6 @@ struct signalfd_siginfo { __s32 ssi_syscall; __u64 ssi_call_addr; __u32 ssi_arch; - __u32 ssi_perf_type; - __u64 ssi_perf_data; /* * Pad strcture to 128 bytes. Remember to update the @@ -51,7 +49,7 @@ struct signalfd_siginfo { * comes out of a read(2) and we really don't want to have * a compat on read(2). */ - __u8 __pad[16]; + __u8 __pad[28]; }; -- cgit v1.2.3 From 79a7f8bdb159d9914b58740f3d31d602a6e4aca8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 May 2021 17:36:03 -0700 Subject: bpf: Introduce bpf_sys_bpf() helper and program type. Add placeholders for bpf_sys_bpf() helper and new program type. Make sure to check that expected_attach_type is zero for future extensibility. Allow tracing helper functions to be used in this program type, since they will only execute from user context via bpf_prog_test_run. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210514003623.28033-2-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ec6d85a81744..c92648f38144 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -937,6 +937,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, + BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ }; enum bpf_attach_type { @@ -4735,6 +4736,12 @@ union bpf_attr { * be zero-terminated except when **str_size** is 0. * * Or **-EBUSY** if the per-CPU memory copy buffer is busy. + * + * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size) + * Description + * Execute bpf syscall with given arguments. + * Return + * A syscall result. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4903,6 +4910,7 @@ union bpf_attr { FN(check_mtu), \ FN(for_each_map_elem), \ FN(snprintf), \ + FN(sys_bpf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 387544bfa291a22383d60b40f887360e2b931ec6 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 May 2021 17:36:10 -0700 Subject: bpf: Introduce fd_idx Typical program loading sequence involves creating bpf maps and applying map FDs into bpf instructions in various places in the bpf program. This job is done by libbpf that is using compiler generated ELF relocations to patch certain instruction after maps are created and BTFs are loaded. The goal of fd_idx is to allow bpf instructions to stay immutable after compilation. At load time the libbpf would still create maps as usual, but it wouldn't need to patch instructions. It would store map_fds into __u32 fd_array[] and would pass that pointer to sys_bpf(BPF_PROG_LOAD). Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210514003623.28033-9-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c92648f38144..de58a714ed36 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1098,8 +1098,8 @@ enum bpf_link_type { /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD - * insn[0].imm: map fd + * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX] + * insn[0].imm: map fd or fd_idx * insn[1].imm: 0 * insn[0].off: 0 * insn[1].off: 0 @@ -1107,15 +1107,19 @@ enum bpf_link_type { * verifier type: CONST_PTR_TO_MAP */ #define BPF_PSEUDO_MAP_FD 1 -/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd +#define BPF_PSEUDO_MAP_IDX 5 + +/* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE + * insn[0].imm: map fd or fd_idx * insn[1].imm: offset into value * insn[0].off: 0 * insn[1].off: 0 * ldimm64 rewrite: address of map[0]+offset * verifier type: PTR_TO_MAP_VALUE */ -#define BPF_PSEUDO_MAP_VALUE 2 +#define BPF_PSEUDO_MAP_VALUE 2 +#define BPF_PSEUDO_MAP_IDX_VALUE 6 + /* insn[0].src_reg: BPF_PSEUDO_BTF_ID * insn[0].imm: kernel btd id of VAR * insn[1].imm: 0 @@ -1315,6 +1319,8 @@ union bpf_attr { /* or valid module BTF object fd or 0 to attach to vmlinux */ __u32 attach_btf_obj_fd; }; + __u32 :32; /* pad */ + __aligned_u64 fd_array; /* array of FDs */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit v1.2.3 From 3d78417b60fba249cc555468cb72d96f5cde2964 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 May 2021 17:36:11 -0700 Subject: bpf: Add bpf_btf_find_by_name_kind() helper. Add new helper: long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags) Description Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. Return Returns btf_id and btf_obj_fd in lower and upper 32 bits. It will be used by loader program to find btf_id to attach the program to and to find btf_ids of ksyms. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210514003623.28033-10-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index de58a714ed36..3cc07351c1cf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4748,6 +4748,12 @@ union bpf_attr { * Execute bpf syscall with given arguments. * Return * A syscall result. + * + * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags) + * Description + * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. + * Return + * Returns btf_id and btf_obj_fd in lower and upper 32 bits. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4917,6 +4923,7 @@ union bpf_attr { FN(for_each_map_elem), \ FN(snprintf), \ FN(sys_bpf), \ + FN(btf_find_by_name_kind), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 3abea089246f76c1517b054ddb5946f3f1dbd2c0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 May 2021 17:36:12 -0700 Subject: bpf: Add bpf_sys_close() helper. Add bpf_sys_close() helper to be used by the syscall/loader program to close intermediate FDs and other cleanup. Note this helper must never be allowed inside fdget/fdput bracketing. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210514003623.28033-11-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3cc07351c1cf..4cd9a0181f27 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4754,6 +4754,12 @@ union bpf_attr { * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. * Return * Returns btf_id and btf_obj_fd in lower and upper 32 bits. + * + * long bpf_sys_close(u32 fd) + * Description + * Execute close syscall for given FD. + * Return + * A syscall result. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4924,6 +4930,7 @@ union bpf_attr { FN(snprintf), \ FN(sys_bpf), \ FN(btf_find_by_name_kind), \ + FN(sys_close), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 7cd60e43a6def40ecb75deb8decc677995970d0b Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Tue, 18 May 2021 13:03:15 -0700 Subject: uapi/auxvec: Define the aux vector AT_MINSIGSTKSZ Define AT_MINSIGSTKSZ in the generic uapi header. It is already used as generic ABI in glibc's generic elf.h, and this define will prevent future namespace conflicts. In particular, x86 is also using this generic definition. Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Reviewed-by: Len Brown Acked-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20210518200320.17239-2-chang.seok.bae@intel.com --- include/uapi/linux/auxvec.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/auxvec.h b/include/uapi/linux/auxvec.h index abe5f2b6581b..c7e502bf5a6f 100644 --- a/include/uapi/linux/auxvec.h +++ b/include/uapi/linux/auxvec.h @@ -33,5 +33,8 @@ #define AT_EXECFN 31 /* filename of program */ +#ifndef AT_MINSIGSTKSZ +#define AT_MINSIGSTKSZ 51 /* minimal stack size for signal delivery */ +#endif #endif /* _UAPI_LINUX_AUXVEC_H */ -- cgit v1.2.3 From 5d67f349590ddc94b6d4e25f19085728db9de697 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 18 May 2021 18:40:32 -0700 Subject: bpf: Add cmd alias BPF_PROG_RUN Add BPF_PROG_RUN command as an alias to BPF_RPOG_TEST_RUN to better indicate the full range of use cases done by the command. Suggested-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210519014032.20908-1-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4cd9a0181f27..418b9b813d65 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -837,6 +837,7 @@ enum bpf_cmd { BPF_PROG_ATTACH, BPF_PROG_DETACH, BPF_PROG_TEST_RUN, + BPF_PROG_RUN = BPF_PROG_TEST_RUN, BPF_PROG_GET_NEXT_ID, BPF_MAP_GET_NEXT_ID, BPF_PROG_GET_FD_BY_ID, -- cgit v1.2.3 From 12ccb76280f8c0c07794fa68f83286b934981ca5 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Tue, 13 Apr 2021 11:40:17 +0200 Subject: media: lirc: remove out of date comment This file has been updated many times since 2010. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index c45a4eaea667..9919f2062b14 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * lirc.h - linux infrared remote control header file - * last modified 2010/07/13 by Jarod Wilson */ #ifndef _LINUX_LIRC_H -- cgit v1.2.3 From 2f0968827a48a3b01a0cc9185abd41978d5ce918 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Thu, 29 Apr 2021 16:48:16 +0200 Subject: media: uapi: Move the MPEG-2 stateless control type out of staging Move the MPEG-2 stateless control types out of staging, and re-number it to avoid any confusion. Signed-off-by: Ezequiel Garcia Tested-by: Jernej Skrabec Reviewed-by: Jernej Skrabec Tested-by: Daniel Almeida Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 311a01cc5775..d3bb18a3a51b 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1807,6 +1807,10 @@ enum v4l2_ctrl_type { V4L2_CTRL_TYPE_FWHT_PARAMS = 0x0220, V4L2_CTRL_TYPE_VP8_FRAME = 0x0240, + + V4L2_CTRL_TYPE_MPEG2_QUANTISATION = 0x0250, + V4L2_CTRL_TYPE_MPEG2_SEQUENCE = 0x0251, + V4L2_CTRL_TYPE_MPEG2_PICTURE = 0x0252, }; /* Used in the VIDIOC_QUERYCTRL ioctl for querying controls */ -- cgit v1.2.3 From f4815b399111d992c1118c708f464a847dfd29e2 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Thu, 29 Apr 2021 16:48:18 +0200 Subject: media: uapi: move MPEG-2 stateless controls out of staging Until now, the MPEG-2 V4L2 API was not exported as a public API, and only defined in a private media header (media/mpeg2-ctrls.h). After reviewing the MPEG-2 specification in detail, and reworking the controls so they match the MPEG-2 semantics properly, we can consider it ready. Signed-off-by: Ezequiel Garcia Tested-by: Jernej Skrabec Reviewed-by: Jernej Skrabec Tested-by: Daniel Almeida Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 112 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/videodev2.h | 3 + 2 files changed, 115 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index d43bec5f1afd..f96bea19c991 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -1862,6 +1862,118 @@ struct v4l2_ctrl_vp8_frame { __u64 flags; }; +/* Stateless MPEG-2 controls */ + +#define V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE 0x01 + +#define V4L2_CID_STATELESS_MPEG2_SEQUENCE (V4L2_CID_CODEC_STATELESS_BASE+220) +/** + * struct v4l2_ctrl_mpeg2_sequence - MPEG-2 sequence header + * + * All the members on this structure match the sequence header and sequence + * extension syntaxes as specified by the MPEG-2 specification. + * + * Fields horizontal_size, vertical_size and vbv_buffer_size are a + * combination of respective _value and extension syntax elements, + * as described in section 6.3.3 "Sequence header". + * + * @horizontal_size: combination of elements horizontal_size_value and + * horizontal_size_extension. + * @vertical_size: combination of elements vertical_size_value and + * vertical_size_extension. + * @vbv_buffer_size: combination of elements vbv_buffer_size_value and + * vbv_buffer_size_extension. + * @profile_and_level_indication: see MPEG-2 specification. + * @chroma_format: see MPEG-2 specification. + * @flags: see V4L2_MPEG2_SEQ_FLAG_{}. + */ +struct v4l2_ctrl_mpeg2_sequence { + __u16 horizontal_size; + __u16 vertical_size; + __u32 vbv_buffer_size; + __u16 profile_and_level_indication; + __u8 chroma_format; + __u8 flags; +}; + +#define V4L2_MPEG2_PIC_CODING_TYPE_I 1 +#define V4L2_MPEG2_PIC_CODING_TYPE_P 2 +#define V4L2_MPEG2_PIC_CODING_TYPE_B 3 +#define V4L2_MPEG2_PIC_CODING_TYPE_D 4 + +#define V4L2_MPEG2_PIC_TOP_FIELD 0x1 +#define V4L2_MPEG2_PIC_BOTTOM_FIELD 0x2 +#define V4L2_MPEG2_PIC_FRAME 0x3 + +#define V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST 0x0001 +#define V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT 0x0002 +#define V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV 0x0004 +#define V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE 0x0008 +#define V4L2_MPEG2_PIC_FLAG_INTRA_VLC 0x0010 +#define V4L2_MPEG2_PIC_FLAG_ALT_SCAN 0x0020 +#define V4L2_MPEG2_PIC_FLAG_REPEAT_FIRST 0x0040 +#define V4L2_MPEG2_PIC_FLAG_PROGRESSIVE 0x0080 + +#define V4L2_CID_STATELESS_MPEG2_PICTURE (V4L2_CID_CODEC_STATELESS_BASE+221) +/** + * struct v4l2_ctrl_mpeg2_picture - MPEG-2 picture header + * + * All the members on this structure match the picture header and picture + * coding extension syntaxes as specified by the MPEG-2 specification. + * + * @backward_ref_ts: timestamp of the V4L2 capture buffer to use as + * reference for backward prediction. + * @forward_ref_ts: timestamp of the V4L2 capture buffer to use as + * reference for forward prediction. These timestamp refers to the + * timestamp field in struct v4l2_buffer. Use v4l2_timeval_to_ns() + * to convert the struct timeval to a __u64. + * @flags: see V4L2_MPEG2_PIC_FLAG_{}. + * @f_code: see MPEG-2 specification. + * @picture_coding_type: see MPEG-2 specification. + * @picture_structure: see V4L2_MPEG2_PIC_{}_FIELD. + * @intra_dc_precision: see MPEG-2 specification. + * @reserved: padding field. Should be zeroed by applications. + */ +struct v4l2_ctrl_mpeg2_picture { + __u64 backward_ref_ts; + __u64 forward_ref_ts; + __u32 flags; + __u8 f_code[2][2]; + __u8 picture_coding_type; + __u8 picture_structure; + __u8 intra_dc_precision; + __u8 reserved[5]; +}; + +#define V4L2_CID_STATELESS_MPEG2_QUANTISATION (V4L2_CID_CODEC_STATELESS_BASE+222) +/** + * struct v4l2_ctrl_mpeg2_quantisation - MPEG-2 quantisation + * + * Quantisation matrices as specified by section 6.3.7 + * "Quant matrix extension". + * + * @intra_quantiser_matrix: The quantisation matrix coefficients + * for intra-coded frames, in zigzag scanning order. It is relevant + * for both luma and chroma components, although it can be superseded + * by the chroma-specific matrix for non-4:2:0 YUV formats. + * @non_intra_quantiser_matrix: The quantisation matrix coefficients + * for non-intra-coded frames, in zigzag scanning order. It is relevant + * for both luma and chroma components, although it can be superseded + * by the chroma-specific matrix for non-4:2:0 YUV formats. + * @chroma_intra_quantiser_matrix: The quantisation matrix coefficients + * for the chominance component of intra-coded frames, in zigzag scanning + * order. Only relevant for 4:2:2 and 4:4:4 YUV formats. + * @chroma_non_intra_quantiser_matrix: The quantisation matrix coefficients + * for the chrominance component of non-intra-coded frames, in zigzag scanning + * order. Only relevant for 4:2:2 and 4:4:4 YUV formats. + */ +struct v4l2_ctrl_mpeg2_quantisation { + __u8 intra_quantiser_matrix[64]; + __u8 non_intra_quantiser_matrix[64]; + __u8 chroma_intra_quantiser_matrix[64]; + __u8 chroma_non_intra_quantiser_matrix[64]; +}; + #define V4L2_CID_COLORIMETRY_CLASS_BASE (V4L2_CTRL_CLASS_COLORIMETRY | 0x900) #define V4L2_CID_COLORIMETRY_CLASS (V4L2_CTRL_CLASS_COLORIMETRY | 1) diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index d3bb18a3a51b..9260791b8438 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1747,6 +1747,9 @@ struct v4l2_ext_control { struct v4l2_ctrl_h264_decode_params __user *p_h264_decode_params; struct v4l2_ctrl_fwht_params __user *p_fwht_params; struct v4l2_ctrl_vp8_frame __user *p_vp8_frame; + struct v4l2_ctrl_mpeg2_sequence __user *p_mpeg2_sequence; + struct v4l2_ctrl_mpeg2_picture __user *p_mpeg2_picture; + struct v4l2_ctrl_mpeg2_quantisation __user *p_mpeg2_quantisation; void __user *ptr; }; } __attribute__ ((packed)); -- cgit v1.2.3 From 3e87f192b405960c0fe83e0925bd0dadf4f8cf43 Mon Sep 17 00:00:00 2001 From: Denis Salopek Date: Tue, 11 May 2021 23:00:04 +0200 Subject: bpf: Add lookup_and_delete_elem support to hashtab Extend the existing bpf_map_lookup_and_delete_elem() functionality to hashtab map types, in addition to stacks and queues. Create a new hashtab bpf_map_ops function that does lookup and deletion of the element under the same bucket lock and add the created map_ops to bpf.h. Signed-off-by: Denis Salopek Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/4d18480a3e990ffbf14751ddef0325eed3be2966.1620763117.git.denis.salopek@sartura.hr --- include/uapi/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 418b9b813d65..562adeac1d67 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -527,6 +527,15 @@ union bpf_iter_link_info { * Look up an element with the given *key* in the map referred to * by the file descriptor *fd*, and if found, delete the element. * + * For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map + * types, the *flags* argument needs to be set to 0, but for other + * map types, it may be specified as: + * + * **BPF_F_LOCK** + * Look up and delete the value of a spin-locked map + * without returning the lock. This must be specified if + * the elements contain a spinlock. + * * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types * implement this command as a "pop" operation, deleting the top * element rather than one corresponding to *key*. @@ -536,6 +545,10 @@ union bpf_iter_link_info { * This command is only valid for the following map types: * * **BPF_MAP_TYPE_QUEUE** * * **BPF_MAP_TYPE_STACK** + * * **BPF_MAP_TYPE_HASH** + * * **BPF_MAP_TYPE_PERCPU_HASH** + * * **BPF_MAP_TYPE_LRU_HASH** + * * **BPF_MAP_TYPE_LRU_PERCPU_HASH** * * Return * Returns zero on success. On error, -1 is returned and *errno* -- cgit v1.2.3 From e624d4ed4aa8cc3c69d1359b0aaea539203ed266 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 19 May 2021 17:07:45 +0800 Subject: xdp: Extend xdp_redirect_map with broadcast support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds two flags BPF_F_BROADCAST and BPF_F_EXCLUDE_INGRESS to extend xdp_redirect_map for broadcast support. With BPF_F_BROADCAST the packet will be broadcasted to all the interfaces in the map. with BPF_F_EXCLUDE_INGRESS the ingress interface will be excluded when do broadcasting. When getting the devices in dev hash map via dev_map_hash_get_next_key(), there is a possibility that we fall back to the first key when a device was removed. This will duplicate packets on some interfaces. So just walk the whole buckets to avoid this issue. For dev array map, we also walk the whole map to find valid interfaces. Function bpf_clear_redirect_map() was removed in commit ee75aef23afe ("bpf, xdp: Restructure redirect actions"). Add it back as we need to use ri->map again. With test topology: +-------------------+ +-------------------+ | Host A (i40e 10G) | ---------- | eno1(i40e 10G) | +-------------------+ | | | Host B | +-------------------+ | | | Host C (i40e 10G) | ---------- | eno2(i40e 10G) | +-------------------+ | | | +------+ | | veth0 -- | Peer | | | veth1 -- | | | | veth2 -- | NS | | | +------+ | +-------------------+ On Host A: # pktgen/pktgen_sample03_burst_single_flow.sh -i eno1 -d $dst_ip -m $dst_mac -s 64 On Host B(Intel(R) Xeon(R) CPU E5-2690 v3 @ 2.60GHz, 128G Memory): Use xdp_redirect_map and xdp_redirect_map_multi in samples/bpf for testing. All the veth peers in the NS have a XDP_DROP program loaded. The forward_map max_entries in xdp_redirect_map_multi is modify to 4. Testing the performance impact on the regular xdp_redirect path with and without patch (to check impact of additional check for broadcast mode): 5.12 rc4 | redirect_map i40e->i40e | 2.0M | 9.7M 5.12 rc4 | redirect_map i40e->veth | 1.7M | 11.8M 5.12 rc4 + patch | redirect_map i40e->i40e | 2.0M | 9.6M 5.12 rc4 + patch | redirect_map i40e->veth | 1.7M | 11.7M Testing the performance when cloning packets with the redirect_map_multi test, using a redirect map size of 4, filled with 1-3 devices: 5.12 rc4 + patch | redirect_map multi i40e->veth (x1) | 1.7M | 11.4M 5.12 rc4 + patch | redirect_map multi i40e->veth (x2) | 1.1M | 4.3M 5.12 rc4 + patch | redirect_map multi i40e->veth (x3) | 0.8M | 2.6M Signed-off-by: Hangbin Liu Signed-off-by: Daniel Borkmann Acked-by: Toke Høiland-Jørgensen Acked-by: Martin KaFai Lau Acked-by: John Fastabend Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/20210519090747.1655268-3-liuhangbin@gmail.com --- include/uapi/linux/bpf.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 562adeac1d67..2c1ba70abbf1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2555,8 +2555,12 @@ union bpf_attr { * The lower two bits of *flags* are used as the return code if * the map lookup fails. This is so that the return value can be * one of the XDP program return codes up to **XDP_TX**, as chosen - * by the caller. Any higher bits in the *flags* argument must be - * unset. + * by the caller. The higher bits of *flags* can be set to + * BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below. + * + * With BPF_F_BROADCAST the packet will be broadcasted to all the + * interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress + * interface will be excluded when do broadcasting. * * See also **bpf_redirect**\ (), which only supports redirecting * to an ifindex, but doesn't require a map to do so. @@ -5122,6 +5126,12 @@ enum { BPF_F_BPRM_SECUREEXEC = (1ULL << 0), }; +/* Flags for bpf_redirect_map helper */ +enum { + BPF_F_BROADCAST = (1ULL << 3), + BPF_F_EXCLUDE_INGRESS = (1ULL << 4), +}; + #define __bpf_md_ptr(type, name) \ union { \ type name; \ -- cgit v1.2.3 From 7e97d274db920df479e222fed10e7b242f90ffb0 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 17 May 2021 13:24:25 +0200 Subject: can: uapi: update CAN-FD frame description Since an early version of the CAN-FD specification the bit that defines a CAN-FD frame on the wire, has been renamed from Extended Data Length (EDL) to FD Frame (FDF). To avoid confusion, update the struct canfd_frame description in the UAPI headers accordingly. Link: https://lore.kernel.org/r/20210517113727.77597-1-mkl@pengutronix.de Suggested-by: Ayoub Kaanich Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index c7535352fef6..ac5d7a31671f 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -123,8 +123,8 @@ struct can_frame { /* * defined bits for canfd_frame.flags * - * The use of struct canfd_frame implies the Extended Data Length (EDL) bit to - * be set in the CAN frame bitstream on the wire. The EDL bit switch turns + * The use of struct canfd_frame implies the FD Frame (FDF) bit to + * be set in the CAN frame bitstream on the wire. The FDF bit switch turns * the CAN controllers bitstream processor into the CAN FD mode which creates * two new options within the CAN FD frame specification: * -- cgit v1.2.3 From 02546884221279da2725e87e35348290470363d7 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 11 Apr 2017 15:43:43 +0200 Subject: can: uapi: introduce CANFD_FDF flag for mixed content in struct canfd_frame The struct can_frame and struct canfd_frame intentionally share the same layout to be able to write CAN frame content into a CAN FD frame structure. When this is done the former differentiation via CAN_MTU / CANFD_MTU is lost. CANFD_FDF allows programmers to mark CAN FD frames in the case of using struct canfd_frame for mixed CAN/CAN FD content (dual use). N.B. the Kernel APIs do NOT provide mixed CAN / CAN FD content inside of struct canfd_frame therefore the CANFD_FDF flag is disregarded by Linux. Link: https://lore.kernel.org/r/20170411134343.3089-1-socketcan@hartkopp.net Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index ac5d7a31671f..90801ada2bbe 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -135,9 +135,18 @@ struct can_frame { * controller only the CANFD_BRS bit is relevant for real CAN controllers when * building a CAN FD frame for transmission. Setting the CANFD_ESI bit can make * sense for virtual CAN interfaces to test applications with echoed frames. + * + * The struct can_frame and struct canfd_frame intentionally share the same + * layout to be able to write CAN frame content into a CAN FD frame structure. + * When this is done the former differentiation via CAN_MTU / CANFD_MTU gets + * lost. CANFD_FDF allows programmers to mark CAN FD frames in the case of + * using struct canfd_frame for mixed CAN / CAN FD content (dual use). + * N.B. the Kernel APIs do NOT provide mixed CAN / CAN FD content inside of + * struct canfd_frame therefore the CANFD_FDF flag is disregarded by Linux. */ #define CANFD_BRS 0x01 /* bit rate switch (second bitrate for payload data) */ #define CANFD_ESI 0x02 /* error state indicator of the transmitting node */ +#define CANFD_FDF 0x04 /* mark CAN FD for dual use of struct canfd_frame */ /** * struct canfd_frame - CAN flexible data rate frame structure -- cgit v1.2.3 From fb1070d18edb37daf3979662975bc54625a19953 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Fri, 21 May 2021 01:58:43 -0700 Subject: KVM: X86: Use _BITUL() macro in UAPI headers Replace BIT() in KVM's UPAI header with _BITUL(). BIT() is not defined in the UAPI headers and its usage may cause userspace build errors. Fixes: fb04a1eddb1a ("KVM: X86: Implement ring-based dirty memory tracking") Signed-off-by: Joe Richey Message-Id: <20210521085849.37676-3-joerichey94@gmail.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3fd9a7e9d90c..79d9c44d1ad7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -8,6 +8,7 @@ * Note: you must update KVM_API_VERSION if you change this interface. */ +#include #include #include #include @@ -1879,8 +1880,8 @@ struct kvm_hyperv_eventfd { * conversion after harvesting an entry. Also, it must not skip any * dirty bits, so that dirty bits are always harvested in sequence. */ -#define KVM_DIRTY_GFN_F_DIRTY BIT(0) -#define KVM_DIRTY_GFN_F_RESET BIT(1) +#define KVM_DIRTY_GFN_F_DIRTY _BITUL(0) +#define KVM_DIRTY_GFN_F_RESET _BITUL(1) #define KVM_DIRTY_GFN_F_MASK 0x3 /* -- cgit v1.2.3 From 133dc203d77dff617d9c4673973ef3859be2c476 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 4 May 2021 17:54:06 +0200 Subject: netfilter: nft_exthdr: Support SCTP chunks Chunks are SCTP header extensions similar in implementation to IPv6 extension headers or TCP options. Reusing exthdr expression to find and extract field values from them is therefore pretty straightforward. For now, this supports extracting data from chunks at a fixed offset (and length) only - chunks themselves are an extensible data structure; in order to make all fields available, a nested extension search is needed. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 1fb4ca18ffbb..19715e2679d1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -813,11 +813,13 @@ enum nft_exthdr_flags { * @NFT_EXTHDR_OP_IPV6: match against ipv6 extension headers * @NFT_EXTHDR_OP_TCP: match against tcp options * @NFT_EXTHDR_OP_IPV4: match against ipv4 options + * @NFT_EXTHDR_OP_SCTP: match against sctp chunks */ enum nft_exthdr_op { NFT_EXTHDR_OP_IPV6, NFT_EXTHDR_OP_TCPOPT, NFT_EXTHDR_OP_IPV4, + NFT_EXTHDR_OP_SCTP, __NFT_EXTHDR_OP_MAX }; #define NFT_EXTHDR_OP_MAX (__NFT_EXTHDR_OP_MAX - 1) -- cgit v1.2.3 From dd8b477f9a3d8edb136207acb3652e1a34a661b7 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 1 Jun 2021 11:33:59 +0200 Subject: mount: Support "nosymfollow" in new mount api Commit dab741e0e02b ("Add a "nosymfollow" mount option.") added support for the "nosymfollow" mount option allowing to block following symlinks when resolving paths. The mount option so far was only available in the old mount api. Make it available in the new mount api as well. Bonus is that it can be applied to a whole subtree not just a single mount. Cc: Christoph Hellwig Cc: Mattias Nissler Cc: Aleksa Sarai Cc: Al Viro Cc: Ross Zwisler Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index e6524ead2b7b..dd7a166fdf9c 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -120,6 +120,7 @@ enum fsconfig_command { #define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ #define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ #define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */ /* * mount_setattr() -- cgit v1.2.3 From e1d9a90a9bfdb0735062d3adb16b07314b4b7b01 Mon Sep 17 00:00:00 2001 From: Sharath Chandra Vurukala Date: Wed, 2 Jun 2021 00:58:35 +0530 Subject: net: ethernet: rmnet: Support for ingress MAPv5 checksum offload Adding support for processing of MAPv5 downlink packets. It involves parsing the Mapv5 packet and checking the csum header to know whether the hardware has validated the checksum and is valid or not. Based on the checksum valid bit the corresponding stats are incremented and skb->ip_summed is marked either CHECKSUM_UNNECESSARY or left as CHEKSUM_NONE to let network stack revalidate the checksum and update the respective snmp stats. Current MAPV1 header has been modified, the reserved field in the Mapv1 header is now used for next header indication. Signed-off-by: Sharath Chandra Vurukala Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index cd5b382a4138..1f753dcd85e1 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1236,6 +1236,7 @@ enum { #define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) +#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4) enum { IFLA_RMNET_UNSPEC, -- cgit v1.2.3 From b6e5d27e32ef6089d316ce7e1ecaf595584d4b84 Mon Sep 17 00:00:00 2001 From: Sharath Chandra Vurukala Date: Wed, 2 Jun 2021 00:58:36 +0530 Subject: net: ethernet: rmnet: Add support for MAPv5 egress packets Adding support for MAPv5 egress packets. This involves adding the MAPv5 header and setting the csum_valid_required in the checksum header to request HW compute the checksum. Corresponding stats are incremented based on whether the checksum is computed in software or HW. New stat has been added which represents the count of packets whose checksum is calculated by the HW. Signed-off-by: Sharath Chandra Vurukala Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 1f753dcd85e1..a5a7f0e64865 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1237,6 +1237,7 @@ enum { #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) #define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4) +#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5 (1U << 5) enum { IFLA_RMNET_UNSPEC, -- cgit v1.2.3 From d170ebb00472268410dce80ae4834c98e79315da Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 21 May 2021 10:45:44 +0200 Subject: media: uapi/linux/cec-funcs.h: set delay to 1 if unnused If the audio_out_delay value is unused, then set it to 1, not 0. The value 0 is reserved, and 1 is a much safer value since it translates to a delay of (1 - 1) * 2 = 0 ms. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/cec-funcs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cec-funcs.h b/include/uapi/linux/cec-funcs.h index 37590027b604..c3baaea0b8ef 100644 --- a/include/uapi/linux/cec-funcs.h +++ b/include/uapi/linux/cec-funcs.h @@ -1665,7 +1665,7 @@ static inline void cec_ops_report_current_latency(const struct cec_msg *msg, if (*audio_out_compensated == 3 && msg->len >= 7) *audio_out_delay = msg->msg[6]; else - *audio_out_delay = 0; + *audio_out_delay = 1; } static inline void cec_msg_request_current_latency(struct cec_msg *msg, -- cgit v1.2.3 From ce67eaca95f8ab5c6aae41a10adfe9a6e8efa58c Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Fri, 21 May 2021 10:58:46 +0200 Subject: media: vicodec: Use _BITUL() macro in UAPI headers Replace BIT() in v4l2's UPAI header with _BITUL(). BIT() is not defined in the UAPI headers and its usage may cause userspace build errors. Fixes: 206bc0f6fb94 ("media: vicodec: mark the stateless FWHT API as stable") Signed-off-by: Joe Richey Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index f96bea19c991..fdf97a6d7d18 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -50,6 +50,7 @@ #ifndef __LINUX_V4L2_CONTROLS_H #define __LINUX_V4L2_CONTROLS_H +#include #include /* Control classes */ @@ -1602,30 +1603,30 @@ struct v4l2_ctrl_h264_decode_params { #define V4L2_FWHT_VERSION 3 /* Set if this is an interlaced format */ -#define V4L2_FWHT_FL_IS_INTERLACED BIT(0) +#define V4L2_FWHT_FL_IS_INTERLACED _BITUL(0) /* Set if this is a bottom-first (NTSC) interlaced format */ -#define V4L2_FWHT_FL_IS_BOTTOM_FIRST BIT(1) +#define V4L2_FWHT_FL_IS_BOTTOM_FIRST _BITUL(1) /* Set if each 'frame' contains just one field */ -#define V4L2_FWHT_FL_IS_ALTERNATE BIT(2) +#define V4L2_FWHT_FL_IS_ALTERNATE _BITUL(2) /* * If V4L2_FWHT_FL_IS_ALTERNATE was set, then this is set if this * 'frame' is the bottom field, else it is the top field. */ -#define V4L2_FWHT_FL_IS_BOTTOM_FIELD BIT(3) +#define V4L2_FWHT_FL_IS_BOTTOM_FIELD _BITUL(3) /* Set if the Y' plane is uncompressed */ -#define V4L2_FWHT_FL_LUMA_IS_UNCOMPRESSED BIT(4) +#define V4L2_FWHT_FL_LUMA_IS_UNCOMPRESSED _BITUL(4) /* Set if the Cb plane is uncompressed */ -#define V4L2_FWHT_FL_CB_IS_UNCOMPRESSED BIT(5) +#define V4L2_FWHT_FL_CB_IS_UNCOMPRESSED _BITUL(5) /* Set if the Cr plane is uncompressed */ -#define V4L2_FWHT_FL_CR_IS_UNCOMPRESSED BIT(6) +#define V4L2_FWHT_FL_CR_IS_UNCOMPRESSED _BITUL(6) /* Set if the chroma plane is full height, if cleared it is half height */ -#define V4L2_FWHT_FL_CHROMA_FULL_HEIGHT BIT(7) +#define V4L2_FWHT_FL_CHROMA_FULL_HEIGHT _BITUL(7) /* Set if the chroma plane is full width, if cleared it is half width */ -#define V4L2_FWHT_FL_CHROMA_FULL_WIDTH BIT(8) +#define V4L2_FWHT_FL_CHROMA_FULL_WIDTH _BITUL(8) /* Set if the alpha plane is uncompressed */ -#define V4L2_FWHT_FL_ALPHA_IS_UNCOMPRESSED BIT(9) +#define V4L2_FWHT_FL_ALPHA_IS_UNCOMPRESSED _BITUL(9) /* Set if this is an I Frame */ -#define V4L2_FWHT_FL_I_FRAME BIT(10) +#define V4L2_FWHT_FL_I_FRAME _BITUL(10) /* A 4-values flag - the number of components - 1 */ #define V4L2_FWHT_FL_COMPONENTS_NUM_MSK GENMASK(18, 16) -- cgit v1.2.3 From 4677efc486e1872f62d4632c50f7183f82296fa6 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:19 +0300 Subject: devlink: Introduce rate object Allow registering rate object for devlink ports with dedicated devlink_rate_leaf_{create|destroy}() API. Implement new netlink DEVLINK_CMD_RATE_GET command that is used to retrieve rate object info. Add new DEVLINK_CMD_RATE_{NEW|DEL} commands that are used for notifications when creating/deleting leaf rate object. Rate API is intended to be used for rate limiting of individual devlink ports (leafs) and their aggregates (nodes). Example: $ devlink port show pci/0000:03:00.0/0 pci/0000:03:00.0/1 $ devlink port function rate show pci/0000:03:00.0/0: type leaf pci/0000:03:00.0/1: type leaf Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index f6008b2fa60f..0c27b45c47db 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -126,6 +126,11 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_TEST, + DEVLINK_CMD_RATE_GET, /* can dump */ + DEVLINK_CMD_RATE_SET, + DEVLINK_CMD_RATE_NEW, + DEVLINK_CMD_RATE_DEL, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -206,6 +211,10 @@ enum devlink_port_flavour { */ }; +enum devlink_rate_type { + DEVLINK_RATE_TYPE_LEAF, +}; + enum devlink_param_cmode { DEVLINK_PARAM_CMODE_RUNTIME, DEVLINK_PARAM_CMODE_DRIVERINIT, @@ -534,6 +543,8 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_ACTION_STATS, /* nested */ DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */ + + DEVLINK_ATTR_RATE_TYPE, /* u16 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From 1897db2ec3109eb1dd07b357c95c5e03d54e41b9 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:22 +0300 Subject: devlink: Allow setting tx rate for devlink rate leaf objects Implement support for DEVLINK_CMD_RATE_SET command with new attributes DEVLINK_ATTR_RATE_TX_{SHARE|MAX} that are used to set devlink rate shared/max tx rate values. Extend devlink ops with new callbacks rate_leaf_tx_{share|max}_set() to allow supporting drivers to implement rate control through devlink. New attributes are optional. Driver implementations are allowed to support either or both of them. Shared rate example: $ devlink port function rate set netdevsim/netdevsim10/0 tx_share 10mbit $ devlink port function rate show netdevsim/netdevsim10/0 netdevsim/netdevsim10/0: type leaf tx_share 10mbit Max rate example: $ devlink port function rate set netdevsim/netdevsim10/0 tx_max 100mbit $ devlink port function rate show netdevsim/netdevsim10/0 netdevsim/netdevsim10/0: type leaf tx_max 100mbit Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 0c27b45c47db..ae94cd2a1078 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -545,6 +545,8 @@ enum devlink_attr { DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */ DEVLINK_ATTR_RATE_TYPE, /* u16 */ + DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ + DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From a8ecb93ef03de4c59fb6289f99bc9616a852c917 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:25 +0300 Subject: devlink: Introduce rate nodes Implement support for DEVLINK_CMD_RATE_{NEW|DEL} commands that are used to create and delete devlink rate nodes. Add new attribute DEVLINK_ATTR_RATE_NODE_NAME that specify node name string. The node name is an alphanumeric identifier. No valid node name can be a devlink port index, eg. decimal number. Extend devlink ops with new callbacks rate_node_{new|del}() and rate_node_tx_{share|max}_set() to allow supporting drivers to implement ports rate grouping and setting tx rate of rate nodes through devlink. Expose devlink_rate_nodes_destroy() function to allow vendor driver do proper cleanup of internally allocated resources for the nodes if the driver goes down or due to any other reasons which requires nodes to be destroyed. Disallow moving device from switchdev to legacy mode if any node exists on that device. User must explicitly delete nodes before switching mode. Example: $ devlink port function rate add netdevsim/netdevsim10/group1 $ devlink port function rate set netdevsim/netdevsim10/group1 \ tx_share 10mbit tx_max 100mbit Add + set command can be combined: $ devlink port function rate add netdevsim/netdevsim10/group1 \ tx_share 10mbit tx_max 100mbit $ devlink port function rate show netdevsim/netdevsim10/group1 netdevsim/netdevsim10/group1: type node tx_share 10mbit tx_max 100mbit $ devlink port function rate del netdevsim/netdevsim10/group1 Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ae94cd2a1078..7e15853b77fe 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -213,6 +213,7 @@ enum devlink_port_flavour { enum devlink_rate_type { DEVLINK_RATE_TYPE_LEAF, + DEVLINK_RATE_TYPE_NODE, }; enum devlink_param_cmode { @@ -547,6 +548,8 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TYPE, /* u16 */ DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ + DEVLINK_ATTR_RATE_NODE_NAME, /* string */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From d7555984507822458b32a6405881038241d140be Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:28 +0300 Subject: devlink: Allow setting parent node of rate objects Refactor DEVLINK_CMD_RATE_{GET|SET} command handlers to support setting a node as a parent for another rate object (leaf or node) by means of new attribute DEVLINK_ATTR_RATE_PARENT_NODE_NAME. Extend devlink ops with new callbacks rate_{leaf|node}_parent_set() to set node as a parent for rate object to allow supporting drivers to implement rate grouping through devlink. Driver implementations are allowed to support leafs or node children only. Invoking callback with NULL as parent should be threated by the driver as unset parent action. Extend rate object struct with reference counter to disallow deleting a node with any child pointing to it. User should unset parent for the child explicitly. Example: $ devlink port function rate add netdevsim/netdevsim10/group1 $ devlink port function rate add netdevsim/netdevsim10/group2 $ devlink port function rate set netdevsim/netdevsim10/group1 parent group2 $ devlink port function rate show netdevsim/netdevsim10/group1 netdevsim/netdevsim10/group1: type node parent group2 $ devlink port function rate set netdevsim/netdevsim10/group1 noparent Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 7e15853b77fe..32f53a0069d6 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -549,6 +549,7 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ DEVLINK_ATTR_RATE_NODE_NAME, /* string */ + DEVLINK_ATTR_RATE_PARENT_NODE_NAME, /* string */ /* add new attributes above here, update the policy in devlink.c */ -- cgit v1.2.3 From a83d958504734f78f42b1e3392d93816297e790a Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 3 Jun 2021 21:20:26 +0200 Subject: Bluetooth: Fix VIRTIO_ID_BT assigned number It turned out that the VIRTIO_ID_* are not assigned in the virtio_ids.h file in the upstream kernel. Picking the next free one was wrong and there is a process that has been followed now. See https://github.com/oasis-tcs/virtio-spec/issues/108 for details. Fixes: afd2daa26c7a ("Bluetooth: Add support for virtio transport driver") Signed-off-by: Marcel Holtmann Signed-off-by: Luiz Augusto von Dentz --- include/uapi/linux/virtio_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index f0c35ce8628c..4fe842c3a3a9 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -54,7 +54,7 @@ #define VIRTIO_ID_SOUND 25 /* virtio sound */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ -#define VIRTIO_ID_BT 28 /* virtio bluetooth */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_BT 40 /* virtio bluetooth */ #endif /* _LINUX_VIRTIO_IDS_H */ -- cgit v1.2.3 From e32ea44c7ae476f4c90e35ab0a29dc8ff082bc11 Mon Sep 17 00:00:00 2001 From: Andreas Roeseler Date: Thu, 3 Jun 2021 16:22:11 -0500 Subject: icmp: fix lib conflict with trinity Including and in the dependencies breaks compilation of trinity due to multiple definitions. is only used in to provide the definition of the struct in_addr, but this can be substituted out by using the datatype __be32. Signed-off-by: Andreas Roeseler Signed-off-by: David S. Miller --- include/uapi/linux/icmp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h index c1da8244c5e1..163c0998aec9 100644 --- a/include/uapi/linux/icmp.h +++ b/include/uapi/linux/icmp.h @@ -20,7 +20,6 @@ #include #include -#include #include #include @@ -154,7 +153,7 @@ struct icmp_ext_echo_iio { struct { struct icmp_ext_echo_ctype3_hdr ctype3_hdr; union { - struct in_addr ipv4_addr; + __be32 ipv4_addr; struct in6_addr ipv6_addr; } ip_addr; } addr; -- cgit v1.2.3 From 819fbd3d8ef36c09576c2a0ffea503f5c46e9177 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 1 Jun 2021 11:31:30 +0200 Subject: media: dvb header files: move some headers to staging The audio, video and OSD APIs are used upstream only by the av7110 driver, which was moved to staging. So, move the corresponding header files to it. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/audio.h | 101 ------------------- include/uapi/linux/dvb/osd.h | 181 --------------------------------- include/uapi/linux/dvb/video.h | 220 ----------------------------------------- 3 files changed, 502 deletions(-) delete mode 100644 include/uapi/linux/dvb/audio.h delete mode 100644 include/uapi/linux/dvb/osd.h delete mode 100644 include/uapi/linux/dvb/video.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dvb/audio.h b/include/uapi/linux/dvb/audio.h deleted file mode 100644 index 2f869da69171..000000000000 --- a/include/uapi/linux/dvb/audio.h +++ /dev/null @@ -1,101 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */ -/* - * audio.h - DEPRECATED MPEG-TS audio decoder API - * - * NOTE: should not be used on future drivers - * - * Copyright (C) 2000 Ralph Metzler - * & Marcus Metzler - * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Lesser Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - */ - -#ifndef _DVBAUDIO_H_ -#define _DVBAUDIO_H_ - -#include - -typedef enum { - AUDIO_SOURCE_DEMUX, /* Select the demux as the main source */ - AUDIO_SOURCE_MEMORY /* Select internal memory as the main source */ -} audio_stream_source_t; - - -typedef enum { - AUDIO_STOPPED, /* Device is stopped */ - AUDIO_PLAYING, /* Device is currently playing */ - AUDIO_PAUSED /* Device is paused */ -} audio_play_state_t; - - -typedef enum { - AUDIO_STEREO, - AUDIO_MONO_LEFT, - AUDIO_MONO_RIGHT, - AUDIO_MONO, - AUDIO_STEREO_SWAPPED -} audio_channel_select_t; - - -typedef struct audio_mixer { - unsigned int volume_left; - unsigned int volume_right; - /* what else do we need? bass, pass-through, ... */ -} audio_mixer_t; - - -typedef struct audio_status { - int AV_sync_state; /* sync audio and video? */ - int mute_state; /* audio is muted */ - audio_play_state_t play_state; /* current playback state */ - audio_stream_source_t stream_source; /* current stream source */ - audio_channel_select_t channel_select; /* currently selected channel */ - int bypass_mode; /* pass on audio data to */ - audio_mixer_t mixer_state; /* current mixer state */ -} audio_status_t; /* separate decoder hardware */ - - -/* for GET_CAPABILITIES and SET_FORMAT, the latter should only set one bit */ -#define AUDIO_CAP_DTS 1 -#define AUDIO_CAP_LPCM 2 -#define AUDIO_CAP_MP1 4 -#define AUDIO_CAP_MP2 8 -#define AUDIO_CAP_MP3 16 -#define AUDIO_CAP_AAC 32 -#define AUDIO_CAP_OGG 64 -#define AUDIO_CAP_SDDS 128 -#define AUDIO_CAP_AC3 256 - -#define AUDIO_STOP _IO('o', 1) -#define AUDIO_PLAY _IO('o', 2) -#define AUDIO_PAUSE _IO('o', 3) -#define AUDIO_CONTINUE _IO('o', 4) -#define AUDIO_SELECT_SOURCE _IO('o', 5) -#define AUDIO_SET_MUTE _IO('o', 6) -#define AUDIO_SET_AV_SYNC _IO('o', 7) -#define AUDIO_SET_BYPASS_MODE _IO('o', 8) -#define AUDIO_CHANNEL_SELECT _IO('o', 9) -#define AUDIO_GET_STATUS _IOR('o', 10, audio_status_t) - -#define AUDIO_GET_CAPABILITIES _IOR('o', 11, unsigned int) -#define AUDIO_CLEAR_BUFFER _IO('o', 12) -#define AUDIO_SET_ID _IO('o', 13) -#define AUDIO_SET_MIXER _IOW('o', 14, audio_mixer_t) -#define AUDIO_SET_STREAMTYPE _IO('o', 15) -#define AUDIO_BILINGUAL_CHANNEL_SELECT _IO('o', 20) - -#endif /* _DVBAUDIO_H_ */ diff --git a/include/uapi/linux/dvb/osd.h b/include/uapi/linux/dvb/osd.h deleted file mode 100644 index 858997c74043..000000000000 --- a/include/uapi/linux/dvb/osd.h +++ /dev/null @@ -1,181 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */ -/* - * osd.h - DEPRECATED On Screen Display API - * - * NOTE: should not be used on future drivers - * - * Copyright (C) 2001 Ralph Metzler - * & Marcus Metzler - * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Lesser Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - */ - -#ifndef _DVBOSD_H_ -#define _DVBOSD_H_ - -#include - -typedef enum { - /* All functions return -2 on "not open" */ - OSD_Close = 1, /* () */ - /* - * Disables OSD and releases the buffers - * returns 0 on success - */ - OSD_Open, /* (x0,y0,x1,y1,BitPerPixel[2/4/8](color&0x0F),mix[0..15](color&0xF0)) */ - /* - * Opens OSD with this size and bit depth - * returns 0 on success, -1 on DRAM allocation error, -2 on "already open" - */ - OSD_Show, /* () */ - /* - * enables OSD mode - * returns 0 on success - */ - OSD_Hide, /* () */ - /* - * disables OSD mode - * returns 0 on success - */ - OSD_Clear, /* () */ - /* - * Sets all pixel to color 0 - * returns 0 on success - */ - OSD_Fill, /* (color) */ - /* - * Sets all pixel to color - * returns 0 on success - */ - OSD_SetColor, /* (color,R{x0},G{y0},B{x1},opacity{y1}) */ - /* - * set palette entry to , and apply - * R,G,B: 0..255 - * R=Red, G=Green, B=Blue - * opacity=0: pixel opacity 0% (only video pixel shows) - * opacity=1..254: pixel opacity as specified in header - * opacity=255: pixel opacity 100% (only OSD pixel shows) - * returns 0 on success, -1 on error - */ - OSD_SetPalette, /* (firstcolor{color},lastcolor{x0},data) */ - /* - * Set a number of entries in the palette - * sets the entries "firstcolor" through "lastcolor" from the array "data" - * data has 4 byte for each color: - * R,G,B, and a opacity value: 0->transparent, 1..254->mix, 255->pixel - */ - OSD_SetTrans, /* (transparency{color}) */ - /* - * Sets transparency of mixed pixel (0..15) - * returns 0 on success - */ - OSD_SetPixel, /* (x0,y0,color) */ - /* - * sets pixel , to color number - * returns 0 on success, -1 on error - */ - OSD_GetPixel, /* (x0,y0) */ - /* returns color number of pixel ,, or -1 */ - OSD_SetRow, /* (x0,y0,x1,data) */ - /* - * fills pixels x0,y through x1,y with the content of data[] - * returns 0 on success, -1 on clipping all pixel (no pixel drawn) - */ - OSD_SetBlock, /* (x0,y0,x1,y1,increment{color},data) */ - /* - * fills pixels x0,y0 through x1,y1 with the content of data[] - * inc contains the width of one line in the data block, - * inc<=0 uses blockwidth as linewidth - * returns 0 on success, -1 on clipping all pixel - */ - OSD_FillRow, /* (x0,y0,x1,color) */ - /* - * fills pixels x0,y through x1,y with the color - * returns 0 on success, -1 on clipping all pixel - */ - OSD_FillBlock, /* (x0,y0,x1,y1,color) */ - /* - * fills pixels x0,y0 through x1,y1 with the color - * returns 0 on success, -1 on clipping all pixel - */ - OSD_Line, /* (x0,y0,x1,y1,color) */ - /* - * draw a line from x0,y0 to x1,y1 with the color - * returns 0 on success - */ - OSD_Query, /* (x0,y0,x1,y1,xasp{color}}), yasp=11 */ - /* - * fills parameters with the picture dimensions and the pixel aspect ratio - * returns 0 on success - */ - OSD_Test, /* () */ - /* - * draws a test picture. for debugging purposes only - * returns 0 on success - * TODO: remove "test" in final version - */ - OSD_Text, /* (x0,y0,size,color,text) */ - OSD_SetWindow, /* (x0) set window with number 0 - * & Ralph Metzler - * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - */ - -#ifndef _UAPI_DVBVIDEO_H_ -#define _UAPI_DVBVIDEO_H_ - -#include -#ifndef __KERNEL__ -#include -#endif - -typedef enum { - VIDEO_FORMAT_4_3, /* Select 4:3 format */ - VIDEO_FORMAT_16_9, /* Select 16:9 format. */ - VIDEO_FORMAT_221_1 /* 2.21:1 */ -} video_format_t; - - -typedef enum { - VIDEO_PAN_SCAN, /* use pan and scan format */ - VIDEO_LETTER_BOX, /* use letterbox format */ - VIDEO_CENTER_CUT_OUT /* use center cut out format */ -} video_displayformat_t; - -typedef struct { - int w; - int h; - video_format_t aspect_ratio; -} video_size_t; - -typedef enum { - VIDEO_SOURCE_DEMUX, /* Select the demux as the main source */ - VIDEO_SOURCE_MEMORY /* If this source is selected, the stream - comes from the user through the write - system call */ -} video_stream_source_t; - - -typedef enum { - VIDEO_STOPPED, /* Video is stopped */ - VIDEO_PLAYING, /* Video is currently playing */ - VIDEO_FREEZED /* Video is freezed */ -} video_play_state_t; - - -/* Decoder commands */ -#define VIDEO_CMD_PLAY (0) -#define VIDEO_CMD_STOP (1) -#define VIDEO_CMD_FREEZE (2) -#define VIDEO_CMD_CONTINUE (3) - -/* Flags for VIDEO_CMD_FREEZE */ -#define VIDEO_CMD_FREEZE_TO_BLACK (1 << 0) - -/* Flags for VIDEO_CMD_STOP */ -#define VIDEO_CMD_STOP_TO_BLACK (1 << 0) -#define VIDEO_CMD_STOP_IMMEDIATELY (1 << 1) - -/* Play input formats: */ -/* The decoder has no special format requirements */ -#define VIDEO_PLAY_FMT_NONE (0) -/* The decoder requires full GOPs */ -#define VIDEO_PLAY_FMT_GOP (1) - -/* The structure must be zeroed before use by the application - This ensures it can be extended safely in the future. */ -struct video_command { - __u32 cmd; - __u32 flags; - union { - struct { - __u64 pts; - } stop; - - struct { - /* 0 or 1000 specifies normal speed, - 1 specifies forward single stepping, - -1 specifies backward single stepping, - >1: playback at speed/1000 of the normal speed, - <-1: reverse playback at (-speed/1000) of the normal speed. */ - __s32 speed; - __u32 format; - } play; - - struct { - __u32 data[16]; - } raw; - }; -}; - -/* FIELD_UNKNOWN can be used if the hardware does not know whether - the Vsync is for an odd, even or progressive (i.e. non-interlaced) - field. */ -#define VIDEO_VSYNC_FIELD_UNKNOWN (0) -#define VIDEO_VSYNC_FIELD_ODD (1) -#define VIDEO_VSYNC_FIELD_EVEN (2) -#define VIDEO_VSYNC_FIELD_PROGRESSIVE (3) - -struct video_event { - __s32 type; -#define VIDEO_EVENT_SIZE_CHANGED 1 -#define VIDEO_EVENT_FRAME_RATE_CHANGED 2 -#define VIDEO_EVENT_DECODER_STOPPED 3 -#define VIDEO_EVENT_VSYNC 4 - /* unused, make sure to use atomic time for y2038 if it ever gets used */ - long timestamp; - union { - video_size_t size; - unsigned int frame_rate; /* in frames per 1000sec */ - unsigned char vsync_field; /* unknown/odd/even/progressive */ - } u; -}; - - -struct video_status { - int video_blank; /* blank video on freeze? */ - video_play_state_t play_state; /* current state of playback */ - video_stream_source_t stream_source; /* current source (demux/memory) */ - video_format_t video_format; /* current aspect ratio of stream*/ - video_displayformat_t display_format;/* selected cropping mode */ -}; - - -struct video_still_picture { - char __user *iFrame; /* pointer to a single iframe in memory */ - __s32 size; -}; - - -typedef __u16 video_attributes_t; -/* bits: descr. */ -/* 15-14 Video compression mode (0=MPEG-1, 1=MPEG-2) */ -/* 13-12 TV system (0=525/60, 1=625/50) */ -/* 11-10 Aspect ratio (0=4:3, 3=16:9) */ -/* 9- 8 permitted display mode on 4:3 monitor (0=both, 1=only pan-sca */ -/* 7 line 21-1 data present in GOP (1=yes, 0=no) */ -/* 6 line 21-2 data present in GOP (1=yes, 0=no) */ -/* 5- 3 source resolution (0=720x480/576, 1=704x480/576, 2=352x480/57 */ -/* 2 source letterboxed (1=yes, 0=no) */ -/* 0 film/camera mode (0= - *camera, 1=film (625/50 only)) */ - - -/* bit definitions for capabilities: */ -/* can the hardware decode MPEG1 and/or MPEG2? */ -#define VIDEO_CAP_MPEG1 1 -#define VIDEO_CAP_MPEG2 2 -/* can you send a system and/or program stream to video device? - (you still have to open the video and the audio device but only - send the stream to the video device) */ -#define VIDEO_CAP_SYS 4 -#define VIDEO_CAP_PROG 8 -/* can the driver also handle SPU, NAVI and CSS encoded data? - (CSS API is not present yet) */ -#define VIDEO_CAP_SPU 16 -#define VIDEO_CAP_NAVI 32 -#define VIDEO_CAP_CSS 64 - - -#define VIDEO_STOP _IO('o', 21) -#define VIDEO_PLAY _IO('o', 22) -#define VIDEO_FREEZE _IO('o', 23) -#define VIDEO_CONTINUE _IO('o', 24) -#define VIDEO_SELECT_SOURCE _IO('o', 25) -#define VIDEO_SET_BLANK _IO('o', 26) -#define VIDEO_GET_STATUS _IOR('o', 27, struct video_status) -#define VIDEO_GET_EVENT _IOR('o', 28, struct video_event) -#define VIDEO_SET_DISPLAY_FORMAT _IO('o', 29) -#define VIDEO_STILLPICTURE _IOW('o', 30, struct video_still_picture) -#define VIDEO_FAST_FORWARD _IO('o', 31) -#define VIDEO_SLOWMOTION _IO('o', 32) -#define VIDEO_GET_CAPABILITIES _IOR('o', 33, unsigned int) -#define VIDEO_CLEAR_BUFFER _IO('o', 34) -#define VIDEO_SET_STREAMTYPE _IO('o', 36) -#define VIDEO_SET_FORMAT _IO('o', 37) -#define VIDEO_GET_SIZE _IOR('o', 55, video_size_t) - -/** - * VIDEO_GET_PTS - * - * Read the 33 bit presentation time stamp as defined - * in ITU T-REC-H.222.0 / ISO/IEC 13818-1. - * - * The PTS should belong to the currently played - * frame if possible, but may also be a value close to it - * like the PTS of the last decoded frame or the last PTS - * extracted by the PES parser. - */ -#define VIDEO_GET_PTS _IOR('o', 57, __u64) - -/* Read the number of displayed frames since the decoder was started */ -#define VIDEO_GET_FRAME_COUNT _IOR('o', 58, __u64) - -#define VIDEO_COMMAND _IOWR('o', 59, struct video_command) -#define VIDEO_TRY_COMMAND _IOWR('o', 60, struct video_command) - -#endif /* _UAPI_DVBVIDEO_H_ */ -- cgit v1.2.3 From 603e4922f1c81fc2ed3a87b4f91a8d3aafc7e093 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 31 May 2021 10:25:26 +0300 Subject: remove the raw driver The raw driver used to provide direct unbuffered access to block devices before O_DIRECT was invented. It has been obsolete for more than a decade. Acked-by: Greg Kroah-Hartman Acked-by: Arnd Bergmann Link: https://lore.kernel.org/lkml/Pine.LNX.4.64.0703180754060.6605@CPE00045a9c397f-CM001225dbafb6/ Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210531072526.97052-1-hch@lst.de Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/raw.h | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 include/uapi/linux/raw.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/raw.h b/include/uapi/linux/raw.h deleted file mode 100644 index 47874919d0b9..000000000000 --- a/include/uapi/linux/raw.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_RAW_H -#define __LINUX_RAW_H - -#include - -#define RAW_SETBIND _IO( 0xac, 0 ) -#define RAW_GETBIND _IO( 0xac, 1 ) - -struct raw_config_request -{ - int raw_minor; - __u64 block_major; - __u64 block_minor; -}; - -#endif /* __LINUX_RAW_H */ -- cgit v1.2.3 From e2cf17d3774c323ef6dab6e9f7c0cfc5e742afd9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 4 Jun 2021 12:27:07 +0200 Subject: netfilter: add new hook nfnl subsystem This nfnl subsystem allows to dump the list of all active netfiler hooks, e.g. defrag, conntrack, nf/ip/arp/ip6tables and so on. This helps to see what kind of features are currently enabled in the network stack. Sample output from nft tool using this infra: $ nft list hook ip input family ip hook input { +0000000010 nft_do_chain_inet [nf_tables] # nft table firewalld INPUT +0000000100 nf_nat_ipv4_local_in [nf_nat] +2147483647 ipv4_confirm [nf_conntrack] } Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink.h | 3 +- include/uapi/linux/netfilter/nfnetlink_hook.h | 55 +++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/netfilter/nfnetlink_hook.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 5bc960f220b3..6cd58cd2a6f0 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -60,7 +60,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_CTHELPER 9 #define NFNL_SUBSYS_NFTABLES 10 #define NFNL_SUBSYS_NFT_COMPAT 11 -#define NFNL_SUBSYS_COUNT 12 +#define NFNL_SUBSYS_HOOK 12 +#define NFNL_SUBSYS_COUNT 13 /* Reserved control nfnetlink messages */ #define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE diff --git a/include/uapi/linux/netfilter/nfnetlink_hook.h b/include/uapi/linux/netfilter/nfnetlink_hook.h new file mode 100644 index 000000000000..912ec60b26b0 --- /dev/null +++ b/include/uapi/linux/netfilter/nfnetlink_hook.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _NFNL_HOOK_H_ +#define _NFNL_HOOK_H_ + +enum nfnl_hook_msg_types { + NFNL_MSG_HOOK_GET, + NFNL_MSG_HOOK_MAX, +}; + +/** + * enum nfnl_hook_attributes - netfilter hook netlink attributes + * + * @NFNLA_HOOK_HOOKNUM: netfilter hook number (NLA_U32) + * @NFNLA_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + * @NFNLA_HOOK_DEV: netdevice name (NLA_STRING) + * @NFNLA_HOOK_FUNCTION_NAME: hook function name (NLA_STRING) + * @NFNLA_HOOK_MODULE_NAME: kernel module that registered this hook (NLA_STRING) + * @NFNLA_HOOK_CHAIN_INFO: basechain hook metadata (NLA_NESTED) + */ +enum nfnl_hook_attributes { + NFNLA_HOOK_UNSPEC, + NFNLA_HOOK_HOOKNUM, + NFNLA_HOOK_PRIORITY, + NFNLA_HOOK_DEV, + NFNLA_HOOK_FUNCTION_NAME, + NFNLA_HOOK_MODULE_NAME, + NFNLA_HOOK_CHAIN_INFO, + __NFNLA_HOOK_MAX +}; +#define NFNLA_HOOK_MAX (__NFNLA_HOOK_MAX - 1) + +/** + * enum nfnl_hook_chain_info_attributes - chain description + * + * NFNLA_HOOK_INFO_DESC: nft chain and table name (enum nft_table_attributes) (NLA_NESTED) + * NFNLA_HOOK_INFO_TYPE: chain type (enum nfnl_hook_chaintype) (NLA_U32) + */ +enum nfnl_hook_chain_info_attributes { + NFNLA_HOOK_INFO_UNSPEC, + NFNLA_HOOK_INFO_DESC, + NFNLA_HOOK_INFO_TYPE, + __NFNLA_HOOK_INFO_MAX, +}; +#define NFNLA_HOOK_INFO_MAX (__NFNLA_HOOK_INFO_MAX - 1) + +/** + * enum nfnl_hook_chaintype - chain type + * + * @NFNL_HOOK_TYPE_NFTABLES nf_tables base chain + */ +enum nfnl_hook_chaintype { + NFNL_HOOK_TYPE_NFTABLES = 0x1, +}; + +#endif /* _NFNL_HOOK_H */ -- cgit v1.2.3 From d409989b59ad0b8d108706db25e17c320a9664eb Mon Sep 17 00:00:00 2001 From: Chen Li Date: Mon, 7 Jun 2021 09:44:35 +0800 Subject: netlink: simplify NLMSG_DATA with NLMSG_HDRLEN The NLMSG_LENGTH(0) may confuse the API users, NLMSG_HDRLEN is much more clear. Besides, some code style problems are also fixed. Signed-off-by: Chen Li Signed-off-by: David S. Miller --- include/uapi/linux/netlink.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 3d94269bbfa8..4c0cde075c27 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -91,9 +91,10 @@ struct nlmsghdr { #define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) #define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN) #define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len)) -#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0))) +#define NLMSG_DATA(nlh) ((void *)(((char *)nlh) + NLMSG_HDRLEN)) #define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ - (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len))) + (struct nlmsghdr *)(((char *)(nlh)) + \ + NLMSG_ALIGN((nlh)->nlmsg_len))) #define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \ (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \ (nlh)->nlmsg_len <= (len)) -- cgit v1.2.3 From 992da01aa932b432ef8dc3885fa76415b5dbe43f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 10 Jun 2021 16:37:37 +0100 Subject: io_uring: change registration/upd/rsrc tagging ABI There are ABI moments about recently added rsrc registration/update and tagging that might become a nuisance in the future. First, IORING_REGISTER_RSRC[_UPD] hide different types of resources under it, so breaks fine control over them by restrictions. It works for now, but once those are wanted under restrictions it would require a rework. It was also inconvenient trying to fit a new resource not supporting all the features (e.g. dynamic update) into the interface, so better to return to IORING_REGISTER_* top level dispatching. Second, register/update were considered to accept a type of resource, however that's not a good idea because there might be several ways of registration of a single resource type, e.g. we may want to add non-contig buffers or anything more exquisite as dma mapped memory. So, remove IORING_RSRC_[FILE,BUFFER] out of the ABI, and place them internally for now to limit changes. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9b554897a7c17ad6e3becc48dfed2f7af9f423d5.1623339162.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index e1ae46683301..48b4ddcd56ff 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -298,8 +298,12 @@ enum { IORING_UNREGISTER_PERSONALITY = 10, IORING_REGISTER_RESTRICTIONS = 11, IORING_REGISTER_ENABLE_RINGS = 12, - IORING_REGISTER_RSRC = 13, - IORING_REGISTER_RSRC_UPDATE = 14, + + /* extended with tagging */ + IORING_REGISTER_FILES2 = 13, + IORING_REGISTER_FILES_UPDATE2 = 14, + IORING_REGISTER_BUFFERS2 = 15, + IORING_REGISTER_BUFFERS_UPDATE = 16, /* this goes last */ IORING_REGISTER_LAST @@ -312,14 +316,10 @@ struct io_uring_files_update { __aligned_u64 /* __s32 * */ fds; }; -enum { - IORING_RSRC_FILE = 0, - IORING_RSRC_BUFFER = 1, -}; - struct io_uring_rsrc_register { - __u32 type; __u32 nr; + __u32 resv; + __u64 resv2; __aligned_u64 data; __aligned_u64 tags; }; @@ -335,8 +335,8 @@ struct io_uring_rsrc_update2 { __u32 resv; __aligned_u64 data; __aligned_u64 tags; - __u32 type; __u32 nr; + __u32 resv2; }; /* Skip updating fd indexes set to this value in the fd table */ -- cgit v1.2.3 From 9690557e22d63f13534fd167d293ac8ed8b104f9 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 10 Jun 2021 16:37:38 +0100 Subject: io_uring: add feature flag for rsrc tags Add IORING_FEAT_RSRC_TAGS indicating that io_uring supports a bunch of new IORING_REGISTER operations, in particular IORING_REGISTER_[FILES[,UPDATE]2,BUFFERS[2,UPDATE]] that support rsrc tagging, and also indicating implemented dynamic fixed buffer updates. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9b995d4045b6c6b4ab7510ca124fd25ac2203af7.1623339162.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 48b4ddcd56ff..162ff99ed2cb 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -280,6 +280,7 @@ struct io_uring_params { #define IORING_FEAT_SQPOLL_NONFIXED (1U << 7) #define IORING_FEAT_EXT_ARG (1U << 8) #define IORING_FEAT_NATIVE_WORKERS (1U << 9) +#define IORING_FEAT_RSRC_TAGS (1U << 10) /* * io_uring_register(2) opcodes and arguments -- cgit v1.2.3 From 6ddb5680085a3eefe0c6267e3514060045a13c95 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 9 Jun 2021 10:27:01 +0800 Subject: audit: remove trailing spaces and tabs Run the following command to find and remove the trailing spaces and tabs: sed -r -i 's/[ \t]+$//' The files to be checked are as follows: kernel/audit* include/linux/audit.h include/uapi/linux/audit.h Signed-off-by: Zhen Lei Acked-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index cd2d8279a5e4..daa481729e9b 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -48,7 +48,7 @@ * 2500 - 2999 future user space (maybe integrity labels and related events) * * Messages from 1000-1199 are bi-directional. 1200-1299 & 2100 - 2999 are - * exclusively user space. 1300-2099 is kernel --> user space + * exclusively user space. 1300-2099 is kernel --> user space * communication. */ #define AUDIT_GET 1000 /* Get status */ @@ -78,7 +78,7 @@ #define AUDIT_LAST_USER_MSG 1199 #define AUDIT_FIRST_USER_MSG2 2100 /* More user space messages */ #define AUDIT_LAST_USER_MSG2 2999 - + #define AUDIT_DAEMON_START 1200 /* Daemon startup record */ #define AUDIT_DAEMON_END 1201 /* Daemon normal stop record */ #define AUDIT_DAEMON_ABORT 1202 /* Daemon error stop record */ -- cgit v1.2.3 From f07b2a5b04d4a50d931a0afe4e3e114ce09a2e4b Mon Sep 17 00:00:00 2001 From: Arseny Krasnov Date: Fri, 11 Jun 2021 14:12:22 +0300 Subject: virtio/vsock: defines and constants for SEQPACKET Add set of defines and constants for SOCK_SEQPACKET support in vsock. Signed-off-by: Arseny Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/uapi/linux/virtio_vsock.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h index 1d57ed3d84d2..3dd3555b2740 100644 --- a/include/uapi/linux/virtio_vsock.h +++ b/include/uapi/linux/virtio_vsock.h @@ -38,6 +38,9 @@ #include #include +/* The feature bitmap for virtio vsock */ +#define VIRTIO_VSOCK_F_SEQPACKET 1 /* SOCK_SEQPACKET supported */ + struct virtio_vsock_config { __le64 guest_cid; } __attribute__((packed)); @@ -65,6 +68,7 @@ struct virtio_vsock_hdr { enum virtio_vsock_type { VIRTIO_VSOCK_TYPE_STREAM = 1, + VIRTIO_VSOCK_TYPE_SEQPACKET = 2, }; enum virtio_vsock_op { @@ -91,4 +95,9 @@ enum virtio_vsock_shutdown { VIRTIO_VSOCK_SHUTDOWN_SEND = 2, }; +/* VIRTIO_VSOCK_OP_RW flags values */ +enum virtio_vsock_rw { + VIRTIO_VSOCK_SEQ_EOR = 1, +}; + #endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ -- cgit v1.2.3 From 00e77ed8e64d5f271c1f015c7153545980d48a76 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 12 Jun 2021 10:20:55 +0200 Subject: rtnetlink: add IFLA_PARENT_[DEV|DEV_BUS]_NAME In some cases, for example in the upcoming WWAN framework changes, there's no natural "parent netdev", so sometimes dummy netdevs are created or similar. IFLA_PARENT_DEV_NAME is a new attribute intended to contain a device (sysfs, struct device) name that can be used instead when creating a new netdev, if the rtnetlink family implements it. As suggested by Parav Pandit, we also introduce IFLA_PARENT_DEV_BUS_NAME attribute in order to uniquely identify a device on the system (with bus/name pair). ip-link(8) support for the generic parent device attributes will help us avoid code duplication, so no other link type will require a custom code to handle the parent name attribute. E.g. the WWAN interface creation command will looks like this: $ ip link add wwan0-1 parent-dev wwan0 type wwan channel-id 1 So, some future subsystem (or driver) FOO will have an interface creation command that looks like this: $ ip link add foo1-3 parent-dev foo1 type foo bar-id 3 baz-type Y Below is an example of dumping link info of a random device with these new attributes: $ ip --details link show wlp0s20f3 4: wlp0s20f3: mtu 1500 qdisc noqueue state UP mode DORMANT group default qlen 1000 ... parent_bus pci parent_dev 0000:00:14.3 Co-developed-by: Sergey Ryazanov Signed-off-by: Sergey Ryazanov Co-developed-by: Loic Poulain Signed-off-by: Loic Poulain Suggested-by: Sergey Ryazanov Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a5a7f0e64865..4882e81514b6 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -341,6 +341,13 @@ enum { IFLA_ALT_IFNAME, /* Alternative ifname */ IFLA_PERM_ADDRESS, IFLA_PROTO_DOWN_REASON, + + /* device (sysfs) name as parent, used instead + * of IFLA_LINK where there's no parent netdev + */ + IFLA_PARENT_DEV_NAME, + IFLA_PARENT_DEV_BUS_NAME, + __IFLA_MAX }; -- cgit v1.2.3 From 88b710532e53de2466d1033fb1d5125aabf3215a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 12 Jun 2021 10:20:56 +0200 Subject: wwan: add interface creation support Add support to create (and destroy) interfaces via a new rtnetlink kind "wwan". The responsible driver has to use the new wwan_register_ops() to make this possible. Signed-off-by: Johannes Berg Signed-off-by: Sergey Ryazanov Signed-off-by: Loic Poulain Signed-off-by: David S. Miller --- include/uapi/linux/wwan.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/uapi/linux/wwan.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/wwan.h b/include/uapi/linux/wwan.h new file mode 100644 index 000000000000..32a2720b4d11 --- /dev/null +++ b/include/uapi/linux/wwan.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (C) 2021 Intel Corporation. + */ +#ifndef _UAPI_WWAN_H_ +#define _UAPI_WWAN_H_ + +enum { + IFLA_WWAN_UNSPEC, + IFLA_WWAN_LINK_ID, /* u32 */ + + __IFLA_WWAN_MAX +}; +#define IFLA_WWAN_MAX (__IFLA_WWAN_MAX - 1) + +#endif /* _UAPI_WWAN_H_ */ -- cgit v1.2.3 From 87815ee9d0060a91bdf18266e42837a9adb5972e Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 13 Apr 2021 07:09:07 -0700 Subject: cxl/pci: Add media provisioning required commands Some of the commands have already been defined for the support of RAW commands (to be blocked). Unlike their usage in the RAW interface, when used through the supported interface, they will be coordinated and marshalled along with other commands being issued by userspace and the driver itself. That coordination will be added later. The list of commands was determined based on the learnings from libnvdimm and this list is provided directly from Dan. Recommended-by: Dan Williams Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20210413140907.534404-1-ben.widawsky@intel.com Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 3155382dfc9b..f6e8a005b113 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -29,6 +29,18 @@ ___C(GET_LSA, "Get Label Storage Area"), \ ___C(GET_HEALTH_INFO, "Get Health Info"), \ ___C(GET_LOG, "Get Log"), \ + ___C(SET_PARTITION_INFO, "Set Partition Information"), \ + ___C(SET_LSA, "Set Label Storage Area"), \ + ___C(GET_ALERT_CONFIG, "Get Alert Configuration"), \ + ___C(SET_ALERT_CONFIG, "Set Alert Configuration"), \ + ___C(GET_SHUTDOWN_STATE, "Get Shutdown State"), \ + ___C(SET_SHUTDOWN_STATE, "Set Shutdown State"), \ + ___C(GET_POISON, "Get Poison List"), \ + ___C(INJECT_POISON, "Inject Poison"), \ + ___C(CLEAR_POISON, "Clear Poison"), \ + ___C(GET_SCAN_MEDIA_CAPS, "Get Scan Media Capabilities"), \ + ___C(SCAN_MEDIA, "Scan Media"), \ + ___C(GET_SCAN_MEDIA, "Get Scan Media Results"), \ ___C(MAX, "invalid / last command") #define ___C(a, b) CXL_MEM_COMMAND_ID_##a -- cgit v1.2.3 From e061047684af63f2d4f1338ec73140f6e29eb59f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 12 Jun 2021 21:32:21 +0900 Subject: bpf: Support BPF_FUNC_get_socket_cookie() for BPF_PROG_TYPE_SK_REUSEPORT. We will call sock_reuseport.prog for socket migration in the next commit, so the eBPF program has to know which listener is closing to select a new listener. We can currently get a unique ID of each listener in the userspace by calling bpf_map_lookup_elem() for BPF_MAP_TYPE_REUSEPORT_SOCKARRAY map. This patch makes the pointer of sk available in sk_reuseport_md so that we can get the ID by BPF_FUNC_get_socket_cookie() in the eBPF program. Suggested-by: Martin KaFai Lau Signed-off-by: Kuniyuki Iwashima Signed-off-by: Daniel Borkmann Reviewed-by: Eric Dumazet Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/netdev/20201119001154.kapwihc2plp4f7zc@kafai-mbp.dhcp.thefacebook.com/ Link: https://lore.kernel.org/bpf/20210612123224.12525-9-kuniyu@amazon.co.jp --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2c1ba70abbf1..f3b72588442b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5416,6 +5416,7 @@ struct sk_reuseport_md { __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ __u32 bind_inany; /* Is sock bound to an INANY address? */ __u32 hash; /* A hash of the packet 4 tuples */ + __bpf_md_ptr(struct bpf_sock *, sk); }; #define BPF_TAG_SIZE 8 -- cgit v1.2.3 From d5e4ddaeb6ab2c3c7fbb7b247a6d34bb0b18d87e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 12 Jun 2021 21:32:22 +0900 Subject: bpf: Support socket migration by eBPF. This patch introduces a new bpf_attach_type for BPF_PROG_TYPE_SK_REUSEPORT to check if the attached eBPF program is capable of migrating sockets. When the eBPF program is attached, we run it for socket migration if the expected_attach_type is BPF_SK_REUSEPORT_SELECT_OR_MIGRATE or net.ipv4.tcp_migrate_req is enabled. Currently, the expected_attach_type is not enforced for the BPF_PROG_TYPE_SK_REUSEPORT type of program. Thus, this commit follows the earlier idea in the commit aac3fc320d94 ("bpf: Post-hooks for sys_bind") to fix up the zero expected_attach_type in bpf_prog_load_fixup_attach_type(). Moreover, this patch adds a new field (migrating_sk) to sk_reuseport_md to select a new listener based on the child socket. migrating_sk varies depending on if it is migrating a request in the accept queue or during 3WHS. - accept_queue : sock (ESTABLISHED/SYN_RECV) - 3WHS : request_sock (NEW_SYN_RECV) In the eBPF program, we can select a new listener by BPF_FUNC_sk_select_reuseport(). Also, we can cancel migration by returning SK_DROP. This feature is useful when listeners have different settings at the socket API level or when we want to free resources as soon as possible. - SK_PASS with selected_sk, select it as a new listener - SK_PASS with selected_sk NULL, fallbacks to the random selection - SK_DROP, cancel the migration. There is a noteworthy point. We select a listening socket in three places, but we do not have struct skb at closing a listener or retransmitting a SYN+ACK. On the other hand, some helper functions do not expect skb is NULL (e.g. skb_header_pointer() in BPF_FUNC_skb_load_bytes(), skb_tail_pointer() in BPF_FUNC_skb_load_bytes_relative()). So we allocate an empty skb temporarily before running the eBPF program. Suggested-by: Martin KaFai Lau Signed-off-by: Kuniyuki Iwashima Signed-off-by: Daniel Borkmann Reviewed-by: Eric Dumazet Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/netdev/20201123003828.xjpjdtk4ygl6tg6h@kafai-mbp.dhcp.thefacebook.com/ Link: https://lore.kernel.org/netdev/20201203042402.6cskdlit5f3mw4ru@kafai-mbp.dhcp.thefacebook.com/ Link: https://lore.kernel.org/netdev/20201209030903.hhow5r53l6fmozjn@kafai-mbp.dhcp.thefacebook.com/ Link: https://lore.kernel.org/bpf/20210612123224.12525-10-kuniyu@amazon.co.jp --- include/uapi/linux/bpf.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f3b72588442b..bf9252c7381e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -994,6 +994,8 @@ enum bpf_attach_type { BPF_SK_LOOKUP, BPF_XDP, BPF_SK_SKB_VERDICT, + BPF_SK_REUSEPORT_SELECT, + BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, __MAX_BPF_ATTACH_TYPE }; @@ -5416,7 +5418,20 @@ struct sk_reuseport_md { __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ __u32 bind_inany; /* Is sock bound to an INANY address? */ __u32 hash; /* A hash of the packet 4 tuples */ + /* When reuse->migrating_sk is NULL, it is selecting a sk for the + * new incoming connection request (e.g. selecting a listen sk for + * the received SYN in the TCP case). reuse->sk is one of the sk + * in the reuseport group. The bpf prog can use reuse->sk to learn + * the local listening ip/port without looking into the skb. + * + * When reuse->migrating_sk is not NULL, reuse->sk is closed and + * reuse->migrating_sk is the socket that needs to be migrated + * to another listening socket. migrating_sk could be a fullsock + * sk that is fully established or a reqsk that is in-the-middle + * of 3-way handshake. + */ __bpf_md_ptr(struct bpf_sock *, sk); + __bpf_md_ptr(struct bpf_sock *, migrating_sk); }; #define BPF_TAG_SIZE 8 -- cgit v1.2.3 From 776c53c6a448905d8b9b161805b67f82301bfe91 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 4 Jun 2021 15:47:52 +0200 Subject: platform/surface: aggregator_cdev: Add support for forwarding events to user-space Currently, debugging unknown events requires writing a custom driver. This is somewhat difficult, slow to adapt, and not entirely user-friendly for quickly trying to figure out things on devices of some third-party user. We can do better. We already have a user-space interface intended for debugging SAM EC requests, so let's add support for receiving events to that. This commit provides support for receiving events by reading from the controller file. It additionally introduces two new IOCTLs to control which event categories will be forwarded. Specifically, a user-space client can specify which target categories it wants to receive events from by registering the corresponding notifier(s) via the IOCTLs and after that, read the received events by reading from the controller device. Signed-off-by: Maximilian Luz Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210604134755.535590-5-luzmaximilian@gmail.com Signed-off-by: Hans de Goede --- include/uapi/linux/surface_aggregator/cdev.h | 41 ++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/surface_aggregator/cdev.h b/include/uapi/linux/surface_aggregator/cdev.h index fbcce04abfe9..4f393fafc235 100644 --- a/include/uapi/linux/surface_aggregator/cdev.h +++ b/include/uapi/linux/surface_aggregator/cdev.h @@ -6,7 +6,7 @@ * device. This device provides direct user-space access to the SSAM EC. * Intended for debugging and development. * - * Copyright (C) 2020 Maximilian Luz + * Copyright (C) 2020-2021 Maximilian Luz */ #ifndef _UAPI_LINUX_SURFACE_AGGREGATOR_CDEV_H @@ -73,6 +73,43 @@ struct ssam_cdev_request { } response; } __attribute__((__packed__)); -#define SSAM_CDEV_REQUEST _IOWR(0xA5, 1, struct ssam_cdev_request) +/** + * struct ssam_cdev_notifier_desc - Notifier descriptor. + * @priority: Priority value determining the order in which notifier + * callbacks will be called. A higher value means higher + * priority, i.e. the associated callback will be executed + * earlier than other (lower priority) callbacks. + * @target_category: The event target category for which this notifier should + * receive events. + * + * Specifies the notifier that should be registered or unregistered, + * specifically with which priority and for which target category of events. + */ +struct ssam_cdev_notifier_desc { + __s32 priority; + __u8 target_category; +} __attribute__((__packed__)); + +/** + * struct ssam_cdev_event - SSAM event sent by the EC. + * @target_category: Target category of the event source. See &enum ssam_ssh_tc. + * @target_id: Target ID of the event source. + * @command_id: Command ID of the event. + * @instance_id: Instance ID of the event source. + * @length: Length of the event payload in bytes. + * @data: Event payload data. + */ +struct ssam_cdev_event { + __u8 target_category; + __u8 target_id; + __u8 command_id; + __u8 instance_id; + __u16 length; + __u8 data[]; +} __attribute__((__packed__)); + +#define SSAM_CDEV_REQUEST _IOWR(0xA5, 1, struct ssam_cdev_request) +#define SSAM_CDEV_NOTIF_REGISTER _IOW(0xA5, 2, struct ssam_cdev_notifier_desc) +#define SSAM_CDEV_NOTIF_UNREGISTER _IOW(0xA5, 3, struct ssam_cdev_notifier_desc) #endif /* _UAPI_LINUX_SURFACE_AGGREGATOR_CDEV_H */ -- cgit v1.2.3 From e8e298a653856b1f3a2bb7b1fe31d3faa93cc7dc Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 4 Jun 2021 15:47:53 +0200 Subject: platform/surface: aggregator_cdev: Allow enabling of events from user-space While events can already be enabled and disabled via the generic request IOCTL, this bypasses the internal reference counting mechanism of the controller. Due to that, disabling an event will turn it off regardless of any other client having requested said event, which may break functionality of that client. To solve this, add IOCTLs wrapping the ssam_controller_event_enable() and ssam_controller_event_disable() functions, which have been previously introduced for this specific purpose. Signed-off-by: Maximilian Luz Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210604134755.535590-6-luzmaximilian@gmail.com Signed-off-by: Hans de Goede --- include/uapi/linux/surface_aggregator/cdev.h | 32 ++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/surface_aggregator/cdev.h b/include/uapi/linux/surface_aggregator/cdev.h index 4f393fafc235..08f46b60b151 100644 --- a/include/uapi/linux/surface_aggregator/cdev.h +++ b/include/uapi/linux/surface_aggregator/cdev.h @@ -90,6 +90,36 @@ struct ssam_cdev_notifier_desc { __u8 target_category; } __attribute__((__packed__)); +/** + * struct ssam_cdev_event_desc - Event descriptor. + * @reg: Registry via which the event will be enabled/disabled. + * @reg.target_category: Target category for the event registry requests. + * @reg.target_id: Target ID for the event registry requests. + * @reg.cid_enable: Command ID for the event-enable request. + * @reg.cid_disable: Command ID for the event-disable request. + * @id: ID specifying the event. + * @id.target_category: Target category of the event source. + * @id.instance: Instance ID of the event source. + * @flags: Flags used for enabling the event. + * + * Specifies which event should be enabled/disabled and how to do that. + */ +struct ssam_cdev_event_desc { + struct { + __u8 target_category; + __u8 target_id; + __u8 cid_enable; + __u8 cid_disable; + } reg; + + struct { + __u8 target_category; + __u8 instance; + } id; + + __u8 flags; +} __attribute__((__packed__)); + /** * struct ssam_cdev_event - SSAM event sent by the EC. * @target_category: Target category of the event source. See &enum ssam_ssh_tc. @@ -111,5 +141,7 @@ struct ssam_cdev_event { #define SSAM_CDEV_REQUEST _IOWR(0xA5, 1, struct ssam_cdev_request) #define SSAM_CDEV_NOTIF_REGISTER _IOW(0xA5, 2, struct ssam_cdev_notifier_desc) #define SSAM_CDEV_NOTIF_UNREGISTER _IOW(0xA5, 3, struct ssam_cdev_notifier_desc) +#define SSAM_CDEV_EVENT_ENABLE _IOW(0xA5, 4, struct ssam_cdev_event_desc) +#define SSAM_CDEV_EVENT_DISABLE _IOW(0xA5, 5, struct ssam_cdev_event_desc) #endif /* _UAPI_LINUX_SURFACE_AGGREGATOR_CDEV_H */ -- cgit v1.2.3 From 8c40602b4be17571dfd75102f4f1e690311c5210 Mon Sep 17 00:00:00 2001 From: Guvenc Gulce Date: Wed, 16 Jun 2021 16:52:56 +0200 Subject: net/smc: Add netlink support for SMC statistics Add the netlink function which collects the statistics information and delivers it to the userspace. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 69 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 3e68da07fba2..f32f11b30963 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -47,6 +47,7 @@ enum { SMC_NETLINK_GET_LGR_SMCD, SMC_NETLINK_GET_DEV_SMCD, SMC_NETLINK_GET_DEV_SMCR, + SMC_NETLINK_GET_STATS, }; /* SMC_GENL_FAMILY top level attributes */ @@ -58,6 +59,7 @@ enum { SMC_GEN_LGR_SMCD, /* nest */ SMC_GEN_DEV_SMCD, /* nest */ SMC_GEN_DEV_SMCR, /* nest */ + SMC_GEN_STATS, /* nest */ __SMC_GEN_MAX, SMC_GEN_MAX = __SMC_GEN_MAX - 1 }; @@ -159,4 +161,71 @@ enum { SMC_NLA_DEV_MAX = __SMC_NLA_DEV_MAX - 1 }; +/* SMC_NLA_STATS_T_TX(RX)_RMB_SIZE nested attributes */ +/* SMC_NLA_STATS_TX(RX)PLOAD_SIZE nested attributes */ +enum { + SMC_NLA_STATS_PLOAD_PAD, + SMC_NLA_STATS_PLOAD_8K, /* u64 */ + SMC_NLA_STATS_PLOAD_16K, /* u64 */ + SMC_NLA_STATS_PLOAD_32K, /* u64 */ + SMC_NLA_STATS_PLOAD_64K, /* u64 */ + SMC_NLA_STATS_PLOAD_128K, /* u64 */ + SMC_NLA_STATS_PLOAD_256K, /* u64 */ + SMC_NLA_STATS_PLOAD_512K, /* u64 */ + SMC_NLA_STATS_PLOAD_1024K, /* u64 */ + SMC_NLA_STATS_PLOAD_G_1024K, /* u64 */ + __SMC_NLA_STATS_PLOAD_MAX, + SMC_NLA_STATS_PLOAD_MAX = __SMC_NLA_STATS_PLOAD_MAX - 1 +}; + +/* SMC_NLA_STATS_T_TX(RX)_RMB_STATS nested attributes */ +enum { + SMC_NLA_STATS_RMB_PAD, + SMC_NLA_STATS_RMB_SIZE_SM_PEER_CNT, /* u64 */ + SMC_NLA_STATS_RMB_SIZE_SM_CNT, /* u64 */ + SMC_NLA_STATS_RMB_FULL_PEER_CNT, /* u64 */ + SMC_NLA_STATS_RMB_FULL_CNT, /* u64 */ + SMC_NLA_STATS_RMB_REUSE_CNT, /* u64 */ + SMC_NLA_STATS_RMB_ALLOC_CNT, /* u64 */ + SMC_NLA_STATS_RMB_DGRADE_CNT, /* u64 */ + __SMC_NLA_STATS_RMB_MAX, + SMC_NLA_STATS_RMB_MAX = __SMC_NLA_STATS_RMB_MAX - 1 +}; + +/* SMC_NLA_STATS_SMCD_TECH and _SMCR_TECH nested attributes */ +enum { + SMC_NLA_STATS_T_PAD, + SMC_NLA_STATS_T_TX_RMB_SIZE, /* nest */ + SMC_NLA_STATS_T_RX_RMB_SIZE, /* nest */ + SMC_NLA_STATS_T_TXPLOAD_SIZE, /* nest */ + SMC_NLA_STATS_T_RXPLOAD_SIZE, /* nest */ + SMC_NLA_STATS_T_TX_RMB_STATS, /* nest */ + SMC_NLA_STATS_T_RX_RMB_STATS, /* nest */ + SMC_NLA_STATS_T_CLNT_V1_SUCC, /* u64 */ + SMC_NLA_STATS_T_CLNT_V2_SUCC, /* u64 */ + SMC_NLA_STATS_T_SRV_V1_SUCC, /* u64 */ + SMC_NLA_STATS_T_SRV_V2_SUCC, /* u64 */ + SMC_NLA_STATS_T_SENDPAGE_CNT, /* u64 */ + SMC_NLA_STATS_T_SPLICE_CNT, /* u64 */ + SMC_NLA_STATS_T_CORK_CNT, /* u64 */ + SMC_NLA_STATS_T_NDLY_CNT, /* u64 */ + SMC_NLA_STATS_T_URG_DATA_CNT, /* u64 */ + SMC_NLA_STATS_T_RX_BYTES, /* u64 */ + SMC_NLA_STATS_T_TX_BYTES, /* u64 */ + SMC_NLA_STATS_T_RX_CNT, /* u64 */ + SMC_NLA_STATS_T_TX_CNT, /* u64 */ + __SMC_NLA_STATS_T_MAX, + SMC_NLA_STATS_T_MAX = __SMC_NLA_STATS_T_MAX - 1 +}; + +/* SMC_GEN_STATS attributes */ +enum { + SMC_NLA_STATS_PAD, + SMC_NLA_STATS_SMCD_TECH, /* nest */ + SMC_NLA_STATS_SMCR_TECH, /* nest */ + SMC_NLA_STATS_CLNT_HS_ERR_CNT, /* u64 */ + SMC_NLA_STATS_SRV_HS_ERR_CNT, /* u64 */ + __SMC_NLA_STATS_MAX, + SMC_NLA_STATS_MAX = __SMC_NLA_STATS_MAX - 1 +}; #endif /* _UAPI_LINUX_SMC_H */ -- cgit v1.2.3 From f0dd7bf5e33066e554442c509ef6351728b95b51 Mon Sep 17 00:00:00 2001 From: Guvenc Gulce Date: Wed, 16 Jun 2021 16:52:57 +0200 Subject: net/smc: Add netlink support for SMC fallback statistics Add support to collect more detailed SMC fallback reason statistics and provide these statistics to user space on the netlink interface. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index f32f11b30963..0f7f87c70baf 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -48,6 +48,7 @@ enum { SMC_NETLINK_GET_DEV_SMCD, SMC_NETLINK_GET_DEV_SMCR, SMC_NETLINK_GET_STATS, + SMC_NETLINK_GET_FBACK_STATS, }; /* SMC_GENL_FAMILY top level attributes */ @@ -60,6 +61,7 @@ enum { SMC_GEN_DEV_SMCD, /* nest */ SMC_GEN_DEV_SMCR, /* nest */ SMC_GEN_STATS, /* nest */ + SMC_GEN_FBACK_STATS, /* nest */ __SMC_GEN_MAX, SMC_GEN_MAX = __SMC_GEN_MAX - 1 }; @@ -228,4 +230,16 @@ enum { __SMC_NLA_STATS_MAX, SMC_NLA_STATS_MAX = __SMC_NLA_STATS_MAX - 1 }; + +/* SMC_GEN_FBACK_STATS attributes */ +enum { + SMC_NLA_FBACK_STATS_PAD, + SMC_NLA_FBACK_STATS_TYPE, /* u8 */ + SMC_NLA_FBACK_STATS_SRV_CNT, /* u64 */ + SMC_NLA_FBACK_STATS_CLNT_CNT, /* u64 */ + SMC_NLA_FBACK_STATS_RSN_CODE, /* u32 */ + SMC_NLA_FBACK_STATS_RSN_CNT, /* u16 */ + __SMC_NLA_FBACK_STATS_MAX, + SMC_NLA_FBACK_STATS_MAX = __SMC_NLA_FBACK_STATS_MAX - 1 +}; #endif /* _UAPI_LINUX_SMC_H */ -- cgit v1.2.3 From 836382dc24717af203ce06703530528827086955 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 16 Jun 2021 22:25:05 +0200 Subject: netfilter: nf_tables: add last expression Add a new optional expression that tells you when last matching on a given rule / set element element has happened. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 19715e2679d1..e94d1fa554cb 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1195,6 +1195,21 @@ enum nft_counter_attributes { }; #define NFTA_COUNTER_MAX (__NFTA_COUNTER_MAX - 1) +/** + * enum nft_last_attributes - nf_tables last expression netlink attributes + * + * @NFTA_LAST_SET: last update has been set, zero means never updated (NLA_U32) + * @NFTA_LAST_MSECS: milliseconds since last update (NLA_U64) + */ +enum nft_last_attributes { + NFTA_LAST_UNSPEC, + NFTA_LAST_SET, + NFTA_LAST_MSECS, + NFTA_LAST_PAD, + __NFTA_LAST_MAX +}; +#define NFTA_LAST_MAX (__NFTA_LAST_MAX - 1) + /** * enum nft_log_attributes - nf_tables log expression netlink attributes * -- cgit v1.2.3 From fe76421d1da1dcdb3a2cd8428ac40106bff28bc0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 Jun 2021 10:19:54 -0600 Subject: io_uring: allow user configurable IO thread CPU affinity io-wq defaults to per-node masks for IO workers. This works fine by default, but isn't particularly handy for workloads that prefer more specific affinities, for either performance or isolation reasons. This adds IORING_REGISTER_IOWQ_AFF that allows the user to pass in a CPU mask that is then applied to IO thread workers, and an IORING_UNREGISTER_IOWQ_AFF that simply resets the masks back to the default of per-node. Note that no care is given to existing IO threads, they will need to go through a reschedule before the affinity is correct if they are already running or sleeping. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 162ff99ed2cb..f1f9ac114b51 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -306,6 +306,10 @@ enum { IORING_REGISTER_BUFFERS2 = 15, IORING_REGISTER_BUFFERS_UPDATE = 16, + /* set/clear io-wq thread affinities */ + IORING_REGISTER_IOWQ_AFF = 17, + IORING_UNREGISTER_IOWQ_AFF = 18, + /* this goes last */ IORING_REGISTER_LAST }; -- cgit v1.2.3 From 644f706719f0297bc5f65c8891de1c32f042eae5 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 21 May 2021 11:51:36 +0200 Subject: KVM: x86: hyper-v: Introduce KVM_CAP_HYPERV_ENFORCE_CPUID Modeled after KVM_CAP_ENFORCE_PV_FEATURE_CPUID, the new capability allows for limiting Hyper-V features to those exposed to the guest in Hyper-V CPUIDs (0x40000003, 0x40000004, ...). Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20210521095204.2161214-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 79d9c44d1ad7..792816144092 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1083,6 +1083,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 #define KVM_CAP_PTP_KVM 198 +#define KVM_CAP_HYPERV_ENFORCE_CPUID 199 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 6dba940352038b56db9b591b172fb2ec76a5fd5e Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 7 Jun 2021 12:02:02 +0300 Subject: KVM: x86: Introduce KVM_GET_SREGS2 / KVM_SET_SREGS2 This is a new version of KVM_GET_SREGS / KVM_SET_SREGS. It has the following changes: * Has flags for future extensions * Has vcpu's PDPTRs, allowing to save/restore them on migration. * Lacks obsolete interrupt bitmap (done now via KVM_SET_VCPU_EVENTS) New capability, KVM_CAP_SREGS2 is added to signal the userspace of this ioctl. Signed-off-by: Maxim Levitsky Message-Id: <20210607090203.133058-8-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 792816144092..90d44138dbfb 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1084,6 +1084,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 #define KVM_CAP_PTP_KVM 198 #define KVM_CAP_HYPERV_ENFORCE_CPUID 199 +#define KVM_CAP_SREGS2 200 #ifdef KVM_CAP_IRQ_ROUTING @@ -1622,6 +1623,9 @@ struct kvm_xen_hvm_attr { #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) #define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) +#define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) +#define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) + struct kvm_xen_vcpu_attr { __u16 type; __u16 pad[3]; -- cgit v1.2.3 From 0dbb11230437895f7cd6fc55da61cef011e997d8 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Tue, 8 Jun 2021 18:05:43 +0000 Subject: KVM: X86: Introduce KVM_HC_MAP_GPA_RANGE hypercall This hypercall is used by the SEV guest to notify a change in the page encryption status to the hypervisor. The hypercall should be invoked only when the encryption attribute is changed from encrypted -> decrypted and vice versa. By default all guest pages are considered encrypted. The hypercall exits to userspace to manage the guest shared regions and integrate with the userspace VMM's migration code. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Paolo Bonzini Cc: Joerg Roedel Cc: Borislav Petkov Cc: Tom Lendacky Cc: x86@kernel.org Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Steve Rutherford Signed-off-by: Brijesh Singh Signed-off-by: Ashish Kalra Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Co-developed-by: Paolo Bonzini Signed-off-by: Paolo Bonzini Message-Id: <90778988e1ee01926ff9cac447aacb745f954c8c.1623174621.git.ashish.kalra@amd.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + include/uapi/linux/kvm_para.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 90d44138dbfb..9febe1412f7a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1085,6 +1085,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PTP_KVM 198 #define KVM_CAP_HYPERV_ENFORCE_CPUID 199 #define KVM_CAP_SREGS2 200 +#define KVM_CAP_EXIT_HYPERCALL 201 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h index 8b86609849b9..960c7e93d1a9 100644 --- a/include/uapi/linux/kvm_para.h +++ b/include/uapi/linux/kvm_para.h @@ -29,6 +29,7 @@ #define KVM_HC_CLOCK_PAIRING 9 #define KVM_HC_SEND_IPI 10 #define KVM_HC_SCHED_YIELD 11 +#define KVM_HC_MAP_GPA_RANGE 12 /* * hypercalls use architecture specific -- cgit v1.2.3 From 2d8ea148e553e1dd4e80a87741abdfb229e2b323 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Thu, 17 Jun 2021 11:37:11 +0800 Subject: net: fix mistake path for netdev_features_strings Th_strings arrays netdev_features_strings, tunable_strings, and phy_tunable_strings has been moved to file net/ethtool/common.c. So fixes the comment. Signed-off-by: Jian Shen Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index cfef6b08169a..67aa7134b301 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -233,7 +233,7 @@ enum tunable_id { ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */ /* * Add your fresh new tunable attribute above and remember to update - * tunable_strings[] in net/core/ethtool.c + * tunable_strings[] in net/ethtool/common.c */ __ETHTOOL_TUNABLE_COUNT, }; @@ -297,7 +297,7 @@ enum phy_tunable_id { ETHTOOL_PHY_EDPD, /* * Add your fresh new phy tunable attribute above and remember to update - * phy_tunable_strings[] in net/core/ethtool.c + * phy_tunable_strings[] in net/ethtool/common.c */ __ETHTOOL_PHY_TUNABLE_COUNT, }; -- cgit v1.2.3 From 68f5d3f3b6543266b29e047cfaf9842333019b4c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 5 Mar 2021 13:19:58 +0100 Subject: um: add PCI over virtio emulation driver To support testing of PCI/PCIe drivers in UML, add a PCI bus support driver. This driver uses virtio, which in UML is really just vhost-user, to talk to devices, and adds the devices to the virtual PCI bus in the system. Since virtio already allows DMA/bus mastering this really isn't all that hard, of course we need the logic_iomem infrastructure that was added by a previous patch. The protocol to talk to the device is has a few fairly simple messages for reading to/writing from config and IO spaces, and messages for the device to send the various interrupts (INT#, MSI/MSI-X and while suspended PME#). Note that currently no offical virtio device ID is assigned for this protocol, as a consequence this patch requires defining it in the Kconfig, with a default that makes the driver refuse to work at all. Finally, in order to add support for MSI/MSI-X interrupts, some small changes are needed in the UML IRQ code, it needs to have more interrupts, changing NR_IRQS from 64 to 128 if this driver is enabled, but not actually use them for anything so that the generic IRQ domain/MSI infrastructure can allocate IRQ numbers. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- include/uapi/linux/virtio_pcidev.h | 64 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 include/uapi/linux/virtio_pcidev.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_pcidev.h b/include/uapi/linux/virtio_pcidev.h new file mode 100644 index 000000000000..89daa88bcfef --- /dev/null +++ b/include/uapi/linux/virtio_pcidev.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* + * Copyright (C) 2021 Intel Corporation + * Author: Johannes Berg + */ +#ifndef _UAPI_LINUX_VIRTIO_PCIDEV_H +#define _UAPI_LINUX_VIRTIO_PCIDEV_H +#include + +/** + * enum virtio_pcidev_ops - virtual PCI device operations + * @VIRTIO_PCIDEV_OP_CFG_READ: read config space, size is 1, 2, 4 or 8; + * the @data field should be filled in by the device (in little endian). + * @VIRTIO_PCIDEV_OP_CFG_WRITE: write config space, size is 1, 2, 4 or 8; + * the @data field contains the data to write (in little endian). + * @VIRTIO_PCIDEV_OP_BAR_READ: read BAR mem/pio, size can be variable; + * the @data field should be filled in by the device (in little endian). + * @VIRTIO_PCIDEV_OP_BAR_WRITE: write BAR mem/pio, size can be variable; + * the @data field contains the data to write (in little endian). + * @VIRTIO_PCIDEV_OP_MMIO_MEMSET: memset MMIO, size is variable but + * the @data field only has one byte (unlike @VIRTIO_PCIDEV_OP_MMIO_WRITE) + * @VIRTIO_PCIDEV_OP_INT: legacy INTx# pin interrupt, the addr field is 1-4 for + * the number + * @VIRTIO_PCIDEV_OP_MSI: MSI(-X) interrupt, this message basically transports + * the 16- or 32-bit write that would otherwise be done into memory, + * analogous to the write messages (@VIRTIO_PCIDEV_OP_MMIO_WRITE) above + * @VIRTIO_PCIDEV_OP_PME: Dummy message whose content is ignored (and should be + * all zeroes) to signal the PME# pin. + */ +enum virtio_pcidev_ops { + VIRTIO_PCIDEV_OP_RESERVED = 0, + VIRTIO_PCIDEV_OP_CFG_READ, + VIRTIO_PCIDEV_OP_CFG_WRITE, + VIRTIO_PCIDEV_OP_MMIO_READ, + VIRTIO_PCIDEV_OP_MMIO_WRITE, + VIRTIO_PCIDEV_OP_MMIO_MEMSET, + VIRTIO_PCIDEV_OP_INT, + VIRTIO_PCIDEV_OP_MSI, + VIRTIO_PCIDEV_OP_PME, +}; + +/** + * struct virtio_pcidev_msg - virtio PCI device operation + * @op: the operation to do + * @bar: the bar (only with BAR read/write messages) + * @reserved: reserved + * @size: the size of the read/write (in bytes) + * @addr: the address to read/write + * @data: the data, normally @size long, but just one byte for + * %VIRTIO_PCIDEV_OP_MMIO_MEMSET + * + * Note: the fields are all in native (CPU) endian, however, the + * @data values will often be in little endian (see the ops above.) + */ +struct virtio_pcidev_msg { + __u8 op; + __u8 bar; + __u16 reserved; + __u32 size; + __u64 addr; + __u8 data[]; +}; + +#endif /* _UAPI_LINUX_VIRTIO_PCIDEV_H */ -- cgit v1.2.3 From 8b532109bf885b7b59b93487bc4672eb6d071b78 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Thu, 17 Jun 2021 19:16:44 +0200 Subject: seg6: add support for SRv6 End.DT46 Behavior IETF RFC 8986 [1] includes the definition of SRv6 End.DT4, End.DT6, and End.DT46 Behaviors. The current SRv6 code in the Linux kernel only implements End.DT4 and End.DT6 which can be used respectively to support IPv4-in-IPv6 and IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic. The proposed End.DT46 implementation is meant to support the decapsulation of IPv4 and IPv6 traffic coming from a single SRv6 tunnel. The implementation of the SRv6 End.DT46 Behavior in the Linux kernel greatly simplifies the setup and operations of SRv6 VPNs. The SRv6 End.DT46 Behavior leverages the infrastructure of SRv6 End.DT{4,6} Behaviors implemented so far, because it makes use of a VRF device in order to force the routing lookup into the associated routing table. To make the End.DT46 work properly, it must be guaranteed that the routing table used for routing lookup operations is bound to one and only one VRF during the tunnel creation. Such constraint has to be enforced by enabling the VRF strict_mode sysctl parameter, i.e.: $ sysctl -wq net.vrf.strict_mode=1 Note that the same approach is used for the SRv6 End.DT4 Behavior and for the End.DT6 Behavior in VRF mode. The command used to instantiate an SRv6 End.DT46 Behavior is straightforward, i.e.: $ ip -6 route add 2001:db8::1 encap seg6local action End.DT46 vrftable 100 dev vrf100. [1] https://www.rfc-editor.org/rfc/rfc8986.html#name-enddt46-decapsulation-and-s ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Performance and impact of SRv6 End.DT46 Behavior on the SRv6 Networking ======================================================================= This patch aims to add the SRv6 End.DT46 Behavior with minimal impact on the performance of SRv6 End.DT4 and End.DT6 Behaviors. In order to verify this, we tested the performance of the newly introduced SRv6 End.DT46 Behavior and compared it with the performance of SRv6 End.DT{4,6} Behaviors, considering both the patched kernel and the kernel before applying the End.DT46 patch (referred to as vanilla kernel). In details, the following decapsulation scenarios were considered: 1.a) IPv6 traffic in SRv6 End.DT46 Behavior on patched kernel; 1.b) IPv4 traffic in SRv6 End.DT46 Behavior on patched kernel; 2.a) SRv6 End.DT6 Behavior (VRF mode) on patched kernel; 2.b) SRv6 End.DT4 Behavior on patched kernel; 3.a) SRv6 End.DT6 Behavior (VRF mode) on vanilla kernel (without the End.DT46 patch); 3.b) SRv6 End.DT4 Behavior on vanilla kernel (without the End.DT46 patch). All tests were performed on a testbed deployed on the CloudLab [2] facilities. We considered IPv{4,6} traffic handled by a single core (at 2.4 GHz on a Xeon(R) CPU E5-2630 v3) on kernel 5.13-rc1 using packets of size ~ 100 bytes. Scenario (1.a): average 684.70 kpps; std. dev. 0.7 kpps; Scenario (1.b): average 711.69 kpps; std. dev. 1.2 kpps; Scenario (2.a): average 690.70 kpps; std. dev. 1.2 kpps; Scenario (2.b): average 722.22 kpps; std. dev. 1.7 kpps; Scenario (3.a): average 690.02 kpps; std. dev. 2.6 kpps; Scenario (3.b): average 721.91 kpps; std. dev. 1.2 kpps; Considering the results for the patched kernel (1.a, 1.b, 2.a, 2.b) we observe that the performance degradation incurred in using End.DT46 rather than End.DT6 and End.DT4 respectively for IPv6 and IPv4 traffic is minimal, around 0.9% and 1.5%. Such very minimal performance degradation is the price to be paid if one prefers to use a single tunnel capable of handling both types of traffic (IPv4 and IPv6). Comparing the results for End.DT4 and End.DT6 under the patched and the vanilla kernel (2.a, 2.b, 3.a, 3.b) we observe that the introduction of the End.DT46 patch has no impact on the performance of End.DT4 and End.DT6. [2] https://www.cloudlab.us Signed-off-by: Andrea Mayer Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/seg6_local.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h index 5ae3ace84de0..332b18f318f8 100644 --- a/include/uapi/linux/seg6_local.h +++ b/include/uapi/linux/seg6_local.h @@ -64,6 +64,8 @@ enum { SEG6_LOCAL_ACTION_END_AM = 14, /* custom BPF action */ SEG6_LOCAL_ACTION_END_BPF = 15, + /* decap and lookup of DA in v4 or v6 table */ + SEG6_LOCAL_ACTION_END_DT46 = 16, __SEG6_LOCAL_ACTION_MAX, }; -- cgit v1.2.3 From 752e906732c69412087f716e93baa0330cb7cce3 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:07 -0700 Subject: mptcp: add csum_enabled in mptcp_sock This patch added a new member named csum_enabled in struct mptcp_sock, used a dummy mptcp_is_checksum_enabled() helper to initialize it. Also added a new member named mptcpi_csum_enabled in struct mptcp_info to expose the csum_enabled flag. Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 8eb3c0844bff..7b05f7102321 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -105,6 +105,7 @@ struct mptcp_info { __u64 mptcpi_rcv_nxt; __u8 mptcpi_local_addr_used; __u8 mptcpi_local_addr_max; + __u8 mptcpi_csum_enabled; }; /* -- cgit v1.2.3 From 321827477360934dc040e9d3c626bf1de6c3ab3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 18 Jun 2021 13:04:35 +0200 Subject: icmp: don't send out ICMP messages with a source address of 0.0.0.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When constructing ICMP response messages, the kernel will try to pick a suitable source address for the outgoing packet. However, if no IPv4 addresses are configured on the system at all, this will fail and we end up producing an ICMP message with a source address of 0.0.0.0. This can happen on a box routing IPv4 traffic via v6 nexthops, for instance. Since 0.0.0.0 is not generally routable on the internet, there's a good chance that such ICMP messages will never make it back to the sender of the original packet that the ICMP message was sent in response to. This, in turn, can create connectivity and PMTUd problems for senders. Fortunately, RFC7600 reserves a dummy address to be used as a source for ICMP messages (192.0.0.8/32), so let's teach the kernel to substitute that address as a last resort if the regular source address selection procedure fails. Below is a quick example reproducing this issue with network namespaces: ip netns add ns0 ip l add type veth peer netns ns0 ip l set dev veth0 up ip a add 10.0.0.1/24 dev veth0 ip a add fc00:dead:cafe:42::1/64 dev veth0 ip r add 10.1.0.0/24 via inet6 fc00:dead:cafe:42::2 ip -n ns0 l set dev veth0 up ip -n ns0 a add fc00:dead:cafe:42::2/64 dev veth0 ip -n ns0 r add 10.0.0.0/24 via inet6 fc00:dead:cafe:42::1 ip netns exec ns0 sysctl -w net.ipv4.icmp_ratelimit=0 ip netns exec ns0 sysctl -w net.ipv4.ip_forward=1 tcpdump -tpni veth0 -c 2 icmp & ping -w 1 10.1.0.1 > /dev/null tcpdump: verbose output suppressed, use -v[v]... for full protocol decode listening on veth0, link-type EN10MB (Ethernet), snapshot length 262144 bytes IP 10.0.0.1 > 10.1.0.1: ICMP echo request, id 29, seq 1, length 64 IP 0.0.0.0 > 10.0.0.1: ICMP net 10.1.0.1 unreachable, length 92 2 packets captured 2 packets received by filter 0 packets dropped by kernel With this patch the above capture changes to: IP 10.0.0.1 > 10.1.0.1: ICMP echo request, id 31127, seq 1, length 64 IP 192.0.0.8 > 10.0.0.1: ICMP net 10.1.0.1 unreachable, length 92 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Juliusz Chroboczek Reviewed-by: David Ahern Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 7d6687618d80..d1b327036ae4 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -289,6 +289,9 @@ struct sockaddr_in { /* Address indicating an error return. */ #define INADDR_NONE ((unsigned long int) 0xffffffff) +/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */ +#define INADDR_DUMMY ((unsigned long int) 0xc0000008) + /* Network number for local host loopback. */ #define IN_LOOPBACKNET 127 -- cgit v1.2.3 From 1f3c98eaddec857e16a7a1c6cd83317b3dc89438 Mon Sep 17 00:00:00 2001 From: Yejune Deng Date: Fri, 18 Jun 2021 22:32:47 +0800 Subject: net: add pf_family_names[] for protocol family Modify the pr_info content from int to char *, this looks more readable. Signed-off-by: Yejune Deng Signed-off-by: David S. Miller --- include/uapi/linux/net.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net.h b/include/uapi/linux/net.h index 4dabec6bd957..a28caaf620c7 100644 --- a/include/uapi/linux/net.h +++ b/include/uapi/linux/net.h @@ -55,4 +55,52 @@ typedef enum { #define __SO_ACCEPTCON (1 << 16) /* performed a listen */ +static const char * const pf_family_names[] = { + [PF_UNSPEC] = "PF_UNSPEC", + [PF_UNIX] = "PF_UNIX/PF_LOCAL", + [PF_INET] = "PF_INET", + [PF_AX25] = "PF_AX25", + [PF_IPX] = "PF_IPX", + [PF_APPLETALK] = "PF_APPLETALK", + [PF_NETROM] = "PF_NETROM", + [PF_BRIDGE] = "PF_BRIDGE", + [PF_ATMPVC] = "PF_ATMPVC", + [PF_X25] = "PF_X25", + [PF_INET6] = "PF_INET6", + [PF_ROSE] = "PF_ROSE", + [PF_DECnet] = "PF_DECnet", + [PF_NETBEUI] = "PF_NETBEUI", + [PF_SECURITY] = "PF_SECURITY", + [PF_KEY] = "PF_KEY", + [PF_NETLINK] = "PF_NETLINK/PF_ROUTE", + [PF_PACKET] = "PF_PACKET", + [PF_ASH] = "PF_ASH", + [PF_ECONET] = "PF_ECONET", + [PF_ATMSVC] = "PF_ATMSVC", + [PF_RDS] = "PF_RDS", + [PF_SNA] = "PF_SNA", + [PF_IRDA] = "PF_IRDA", + [PF_PPPOX] = "PF_PPPOX", + [PF_WANPIPE] = "PF_WANPIPE", + [PF_LLC] = "PF_LLC", + [PF_IB] = "PF_IB", + [PF_MPLS] = "PF_MPLS", + [PF_CAN] = "PF_CAN", + [PF_TIPC] = "PF_TIPC", + [PF_BLUETOOTH] = "PF_BLUETOOTH", + [PF_IUCV] = "PF_IUCV", + [PF_RXRPC] = "PF_RXRPC", + [PF_ISDN] = "PF_ISDN", + [PF_PHONET] = "PF_PHONET", + [PF_IEEE802154] = "PF_IEEE802154", + [PF_CAIF] = "PF_CAIF", + [PF_ALG] = "PF_ALG", + [PF_NFC] = "PF_NFC", + [PF_VSOCK] = "PF_VSOCK", + [PF_KCM] = "PF_KCM", + [PF_QIPCRTR] = "PF_QIPCRTR", + [PF_SMC] = "PF_SMC", + [PF_XDP] = "PF_XDP", +}; + #endif /* _UAPI_LINUX_NET_H */ -- cgit v1.2.3 From 103ebe658a262ef5b5db7f01d83857cf82a087d0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 18 Jun 2021 13:02:45 -0700 Subject: Revert "net: add pf_family_names[] for protocol family" This reverts commit 1f3c98eaddec857e16a7a1c6cd83317b3dc89438. Does not build... Signed-off-by: David S. Miller --- include/uapi/linux/net.h | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net.h b/include/uapi/linux/net.h index a28caaf620c7..4dabec6bd957 100644 --- a/include/uapi/linux/net.h +++ b/include/uapi/linux/net.h @@ -55,52 +55,4 @@ typedef enum { #define __SO_ACCEPTCON (1 << 16) /* performed a listen */ -static const char * const pf_family_names[] = { - [PF_UNSPEC] = "PF_UNSPEC", - [PF_UNIX] = "PF_UNIX/PF_LOCAL", - [PF_INET] = "PF_INET", - [PF_AX25] = "PF_AX25", - [PF_IPX] = "PF_IPX", - [PF_APPLETALK] = "PF_APPLETALK", - [PF_NETROM] = "PF_NETROM", - [PF_BRIDGE] = "PF_BRIDGE", - [PF_ATMPVC] = "PF_ATMPVC", - [PF_X25] = "PF_X25", - [PF_INET6] = "PF_INET6", - [PF_ROSE] = "PF_ROSE", - [PF_DECnet] = "PF_DECnet", - [PF_NETBEUI] = "PF_NETBEUI", - [PF_SECURITY] = "PF_SECURITY", - [PF_KEY] = "PF_KEY", - [PF_NETLINK] = "PF_NETLINK/PF_ROUTE", - [PF_PACKET] = "PF_PACKET", - [PF_ASH] = "PF_ASH", - [PF_ECONET] = "PF_ECONET", - [PF_ATMSVC] = "PF_ATMSVC", - [PF_RDS] = "PF_RDS", - [PF_SNA] = "PF_SNA", - [PF_IRDA] = "PF_IRDA", - [PF_PPPOX] = "PF_PPPOX", - [PF_WANPIPE] = "PF_WANPIPE", - [PF_LLC] = "PF_LLC", - [PF_IB] = "PF_IB", - [PF_MPLS] = "PF_MPLS", - [PF_CAN] = "PF_CAN", - [PF_TIPC] = "PF_TIPC", - [PF_BLUETOOTH] = "PF_BLUETOOTH", - [PF_IUCV] = "PF_IUCV", - [PF_RXRPC] = "PF_RXRPC", - [PF_ISDN] = "PF_ISDN", - [PF_PHONET] = "PF_PHONET", - [PF_IEEE802154] = "PF_IEEE802154", - [PF_CAIF] = "PF_CAIF", - [PF_ALG] = "PF_ALG", - [PF_NFC] = "PF_NFC", - [PF_VSOCK] = "PF_VSOCK", - [PF_KCM] = "PF_KCM", - [PF_QIPCRTR] = "PF_QIPCRTR", - [PF_SMC] = "PF_SMC", - [PF_XDP] = "PF_XDP", -}; - #endif /* _UAPI_LINUX_NET_H */ -- cgit v1.2.3 From 2d82ab251ef0f6e7716279b04e9b5a01a86ca530 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 20 May 2021 17:46:54 +0200 Subject: virtiofs: propagate sync() to file server Even if POSIX doesn't mandate it, linux users legitimately expect sync() to flush all data and metadata to physical storage when it is located on the same system. This isn't happening with virtiofs though: sync() inside the guest returns right away even though data still needs to be flushed from the host page cache. This is easily demonstrated by doing the following in the guest: $ dd if=/dev/zero of=/mnt/foo bs=1M count=5K ; strace -T -e sync sync 5120+0 records in 5120+0 records out 5368709120 bytes (5.4 GB, 5.0 GiB) copied, 5.22224 s, 1.0 GB/s sync() = 0 <0.024068> and start the following in the host when the 'dd' command completes in the guest: $ strace -T -e fsync /usr/bin/sync virtiofs/foo fsync(3) = 0 <10.371640> There are no good reasons not to honor the expected behavior of sync() actually: it gives an unrealistic impression that virtiofs is super fast and that data has safely landed on HW, which isn't the case obviously. Implement a ->sync_fs() superblock operation that sends a new FUSE_SYNCFS request type for this purpose. Provision a 64-bit placeholder for possible future extensions. Since the file server cannot handle the wait == 0 case, we skip it to avoid a gratuitous roundtrip. Note that this is per-superblock: a FUSE_SYNCFS is send for the root mount and for each submount. Like with FUSE_FSYNC and FUSE_FSYNCDIR, lack of support for FUSE_SYNCFS in the file server is treated as permanent success. This ensures compatibility with older file servers: the client will get the current behavior of sync() not being propagated to the file server. Note that such an operation allows the file server to DoS sync(). Since a typical FUSE file server is an untrusted piece of software running in userspace, this is disabled by default. Only enable it with virtiofs for now since virtiofsd is supposedly trusted by the guest kernel. Reported-by: Robert Krawitz Signed-off-by: Greg Kurz Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 271ae90a9bb7..36ed092227fa 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -181,6 +181,9 @@ * - add FUSE_OPEN_KILL_SUIDGID * - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT * - add FUSE_SETXATTR_ACL_KILL_SGID + * + * 7.34 + * - add FUSE_SYNCFS */ #ifndef _LINUX_FUSE_H @@ -216,7 +219,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 33 +#define FUSE_KERNEL_MINOR_VERSION 34 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -509,6 +512,7 @@ enum fuse_opcode { FUSE_COPY_FILE_RANGE = 47, FUSE_SETUPMAPPING = 48, FUSE_REMOVEMAPPING = 49, + FUSE_SYNCFS = 50, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -971,4 +975,8 @@ struct fuse_removemapping_one { #define FUSE_REMOVEMAPPING_MAX_ENTRY \ (PAGE_SIZE / sizeof(struct fuse_removemapping_one)) +struct fuse_syncfs_in { + uint64_t padding; +}; + #endif /* _LINUX_FUSE_H */ -- cgit v1.2.3 From 1a9fd4172d5c8ba64735b3aef7eed643d398ce05 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 21 May 2021 17:42:23 +0200 Subject: btrfs: fix typos in comments Fix typos that have snuck in since the last round. Found by codespell. Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 4 ++-- include/uapi/linux/btrfs_tree.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 5df73001aad4..22cd037123fa 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -154,7 +154,7 @@ struct btrfs_scrub_progress { __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */ __u64 read_errors; /* # of read errors encountered (EIO) */ __u64 csum_errors; /* # of failed csum checks */ - __u64 verify_errors; /* # of occurences, where the metadata + __u64 verify_errors; /* # of occurrences, where the metadata * of a tree block did not match the * expected values, like generation or * logical */ @@ -174,7 +174,7 @@ struct btrfs_scrub_progress { __u64 last_physical; /* last physical address scrubbed. In * case a scrub was aborted, this can * be used to restart the scrub */ - __u64 unverified_errors; /* # of occurences where a read for a + __u64 unverified_errors; /* # of occurrences where a read for a * full (64k) bio failed, but the re- * check succeeded for each 4k piece. * Intermittent error. */ diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 58d7cff9afb1..ccdb40fe40dc 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -59,7 +59,7 @@ /* for storing balance parameters in the root tree */ #define BTRFS_BALANCE_OBJECTID -4ULL -/* orhpan objectid for tracking unlinked/truncated files */ +/* orphan objectid for tracking unlinked/truncated files */ #define BTRFS_ORPHAN_OBJECTID -5ULL /* does write ahead logging to speed up fsyncs */ @@ -275,7 +275,7 @@ #define BTRFS_PERSISTENT_ITEM_KEY 249 /* - * Persistantly stores the device replace state in the device tree. + * Persistently stores the device replace state in the device tree. * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0). */ #define BTRFS_DEV_REPLACE_KEY 250 -- cgit v1.2.3 From ea7fc1bb1cd1b92b42b1d9273ce7e231d3dc9321 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:12 +0100 Subject: KVM: arm64: Introduce MTE VM feature Add a new VM feature 'KVM_ARM_CAP_MTE' which enables memory tagging for a VM. This will expose the feature to the guest and automatically tag memory pages touched by the VM as PG_mte_tagged (and clear the tag storage) to ensure that the guest cannot see stale tags, and so that the tags are correctly saved/restored across swap. Actually exposing the new capability to user space happens in a later patch. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price [maz: move VM_SHARED sampling into the critical section] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-3-steven.price@arm.com --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 79d9c44d1ad7..d4da58ddcad7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1083,6 +1083,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 #define KVM_CAP_PTP_KVM 198 +#define KVM_CAP_ARM_MTE 199 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From f0376edb1ddcab19a473b4bf1fbd5b6bbed3705b Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:15 +0100 Subject: KVM: arm64: Add ioctl to fetch/store tags in a guest The VMM may not wish to have it's own mapping of guest memory mapped with PROT_MTE because this causes problems if the VMM has tag checking enabled (the guest controls the tags in physical RAM and it's unlikely the tags are correct for the VMM). Instead add a new ioctl which allows the VMM to easily read/write the tags from guest memory, allowing the VMM's mapping to be non-PROT_MTE while the VMM can still read/write the tags for the purpose of migration. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-6-steven.price@arm.com --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d4da58ddcad7..da1edd2b4046 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1429,6 +1429,7 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_PMU_EVENT_FILTER */ #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) +#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) -- cgit v1.2.3 From b87cc116c7e1bc62a84d8c46acd401db179edb11 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 21 Jun 2021 14:20:02 +0530 Subject: KVM: PPC: Book3S HV: Add KVM_CAP_PPC_RPT_INVALIDATE capability Now that we have H_RPT_INVALIDATE fully implemented, enable support for the same via KVM_CAP_PPC_RPT_INVALIDATE KVM capability Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210621085003.904767-6-bharata@linux.ibm.com --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3fd9a7e9d90c..613198a94c43 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1082,6 +1082,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 #define KVM_CAP_PTP_KVM 198 +#define KVM_CAP_PPC_RPT_INVALIDATE 199 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From bf22a6976897977b0a3f1aeba6823c959fc4fdae Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Apr 2021 21:44:23 +0200 Subject: futex: Provide FUTEX_LOCK_PI2 to support clock selection The FUTEX_LOCK_PI futex operand uses a CLOCK_REALTIME based absolute timeout since it was implemented, but it does not require that the FUTEX_CLOCK_REALTIME flag is set, because that was introduced later. In theory as none of the user space implementations can set the FUTEX_CLOCK_REALTIME flag on this operand, it would be possible to creatively abuse it and make the meaning invers, i.e. select CLOCK_REALTIME when not set and CLOCK_MONOTONIC when set. But that's a nasty hackery. Another option would be to have a new FUTEX_CLOCK_MONOTONIC flag only for FUTEX_LOCK_PI, but that's also awkward because it does not allow libraries to handle the timeout clock selection consistently. So provide a new FUTEX_LOCK_PI2 operand which implements the timeout semantics which the other operands use and leave FUTEX_LOCK_PI alone. Reported-by: Kurt Kanzenbach Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210422194705.440773992@linutronix.de --- include/uapi/linux/futex.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index a89eb0accd5e..235e5b2facaa 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -21,6 +21,7 @@ #define FUTEX_WAKE_BITSET 10 #define FUTEX_WAIT_REQUEUE_PI 11 #define FUTEX_CMP_REQUEUE_PI 12 +#define FUTEX_LOCK_PI2 13 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -32,6 +33,7 @@ #define FUTEX_CMP_REQUEUE_PRIVATE (FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_OP_PRIVATE (FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG) #define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) +#define FUTEX_LOCK_PI2_PRIVATE (FUTEX_LOCK_PI2 | FUTEX_PRIVATE_FLAG) #define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG) -- cgit v1.2.3 From 913d026fbfaf114ff87afcc77fa4e9309f87f114 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 22 Jun 2021 09:50:47 +0300 Subject: ethtool: Document correct attribute type 'ETHTOOL_A_MODULE_EEPROM_DATA' is a binary attribute, not a nested one. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 825cfda1c5d5..c7135c9c37a5 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -675,7 +675,7 @@ enum { ETHTOOL_A_MODULE_EEPROM_PAGE, /* u8 */ ETHTOOL_A_MODULE_EEPROM_BANK, /* u8 */ ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_DATA, /* nested */ + ETHTOOL_A_MODULE_EEPROM_DATA, /* binary */ __ETHTOOL_A_MODULE_EEPROM_CNT, ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1) -- cgit v1.2.3 From 3190b649b4d9391be7bde3edd8e924e451c5d2f6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 22 Jun 2021 14:04:49 -0400 Subject: sctp: add SCTP_PLPMTUD_PROBE_INTERVAL sockopt for sock/asoc/transport With this socket option, users can change probe_interval for a transport, asoc or sock after it's created. Note that if the change is for an asoc, also apply the change to each transport in this asoc. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index cb78e7a739da..c4ff1ebd8bcc 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -141,6 +141,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_EXPOSE_POTENTIALLY_FAILED_STATE 131 #define SCTP_EXPOSE_PF_STATE SCTP_EXPOSE_POTENTIALLY_FAILED_STATE #define SCTP_REMOTE_UDP_ENCAPS_PORT 132 +#define SCTP_PLPMTUD_PROBE_INTERVAL 133 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -1213,4 +1214,11 @@ enum sctp_sched_type { SCTP_SS_MAX = SCTP_SS_RR }; +/* Probe Interval socket option */ +struct sctp_probeinterval { + sctp_assoc_t spi_assoc_id; + struct sockaddr_storage spi_address; + __u32 spi_interval; +}; + #endif /* _UAPI_SCTP_H */ -- cgit v1.2.3 From dd3e4fc75b4ab8186a133cfe9d49666a2f8186e0 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Fri, 18 Jun 2021 13:41:36 +0300 Subject: nl80211/cfg80211: add BSS color to NDP ranging parameters In NDP ranging, the initiator need to set the BSS color in the NDP to the BSS color of the responder. Add the BSS color as a parameter for NDP ranging. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20210618133832.f097a6144b59.I27dec8b994df52e691925ea61be4dd4fa6d396c0@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f962c06e9818..771f238ccff1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -11,7 +11,7 @@ * Copyright 2008 Jouni Malinen * Copyright 2008 Colin McCabe * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -6912,6 +6912,9 @@ enum nl80211_peer_measurement_ftm_capa { * @NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK: negotiate for LMR feedback. Only * valid if either %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED or * %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED is set. + * @NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR: optional. The BSS color of the + * responder. Only valid if %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED + * or %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED is set. * * @NUM_NL80211_PMSR_FTM_REQ_ATTR: internal * @NL80211_PMSR_FTM_REQ_ATTR_MAX: highest attribute number @@ -6931,6 +6934,7 @@ enum nl80211_peer_measurement_ftm_req { NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED, NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED, NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK, + NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR, /* keep last */ NUM_NL80211_PMSR_FTM_REQ_ATTR, -- cgit v1.2.3 From f4f8650588d35deafaa4a4e28cceb3557a71e711 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 18 Jun 2021 13:41:52 +0300 Subject: cfg80211: allow advertising vendor-specific capabilities There may be cases where vendor-specific elements need to be used over the air. Rather than have driver or firmware add them and possibly cause problems that way, add them to the iftype-data band capabilities. This way we can advertise to userspace first, and use them in mac80211 next. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20210618133832.e8c4f0347276.Iee5964682b3e9ec51fc1cd57a7c62383eaf6ddd7@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 771f238ccff1..db474994fa73 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3654,6 +3654,8 @@ enum nl80211_mpath_info { * defined * @NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA: HE 6GHz band capabilities (__le16), * given for all 6 GHz band channels + * @NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS: vendor element capabilities that are + * advertised on this band/for this iftype (binary) * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use */ enum nl80211_band_iftype_attr { @@ -3665,6 +3667,7 @@ enum nl80211_band_iftype_attr { NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE, NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA, + NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS, /* keep last */ __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST, -- cgit v1.2.3 From 55d444b310c64b084dcc62ba3e4dc3862269fb96 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 23 Jun 2021 08:35:29 +0900 Subject: tcp: Add stats for socket migration. This commit adds two stats for the socket migration feature to evaluate the effectiveness: LINUX_MIB_TCPMIGRATEREQ(SUCCESS|FAILURE). If the migration fails because of the own_req race in receiving ACK and sending SYN+ACK paths, we do not increment the failure stat. Then another CPU is responsible for the req. Link: https://lore.kernel.org/bpf/CAK6E8=cgFKuGecTzSCSQ8z3YJ_163C0uwO9yRvfDSE7vOe9mJA@mail.gmail.com/ Suggested-by: Yuchung Cheng Signed-off-by: Kuniyuki Iwashima Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 26fc60ce9298..904909d020e2 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -290,6 +290,8 @@ enum LINUX_MIB_TCPDUPLICATEDATAREHASH, /* TCPDuplicateDataRehash */ LINUX_MIB_TCPDSACKRECVSEGS, /* TCPDSACKRecvSegs */ LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */ + LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */ + LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */ __LINUX_MIB_MAX }; -- cgit v1.2.3 From cb082bfab59a224a49ae803fed52cd03e8d6b5e0 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Fri, 18 Jun 2021 22:27:04 +0000 Subject: KVM: stats: Add fd-based API to read binary stats data This commit defines the API for userspace and prepare the common functionalities to support per VM/VCPU binary stats data readings. The KVM stats now is only accessible by debugfs, which has some shortcomings this change series are supposed to fix: 1. The current debugfs stats solution in KVM could be disabled when kernel Lockdown mode is enabled, which is a potential rick for production. 2. The current debugfs stats solution in KVM is organized as "one stats per file", it is good for debugging, but not efficient for production. 3. The stats read/clear in current debugfs solution in KVM are protected by the global kvm_lock. Besides that, there are some other benefits with this change: 1. All KVM VM/VCPU stats can be read out in a bulk by one copy to userspace. 2. A schema is used to describe KVM statistics. From userspace's perspective, the KVM statistics are self-describing. 3. With the fd-based solution, a separate telemetry would be able to read KVM stats in a less privileged environment. 4. After the initial setup by reading in stats descriptors, a telemetry only needs to read the stats data itself, no more parsing or setup is needed. Reviewed-by: David Matlack Reviewed-by: Ricardo Koller Reviewed-by: Krish Sadhukhan Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba #arm64 Signed-off-by: Jing Zhang Message-Id: <20210618222709.1858088-3-jingzhangos@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 330835f1005b..f1ba602260f6 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1087,6 +1087,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SREGS2 200 #define KVM_CAP_EXIT_HYPERCALL 201 #define KVM_CAP_PPC_RPT_INVALIDATE 202 +#define KVM_CAP_BINARY_STATS_FD 203 #ifdef KVM_CAP_IRQ_ROUTING @@ -1906,4 +1907,76 @@ struct kvm_dirty_gfn { #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) +/** + * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. + * @flags: Some extra information for header, always 0 for now. + * @name_size: The size in bytes of the memory which contains statistics + * name string including trailing '\0'. The memory is allocated + * at the send of statistics descriptor. + * @num_desc: The number of statistics the vm or vcpu has. + * @id_offset: The offset of the vm/vcpu stats' id string in the file pointed + * by vm/vcpu stats fd. + * @desc_offset: The offset of the vm/vcpu stats' descriptor block in the file + * pointd by vm/vcpu stats fd. + * @data_offset: The offset of the vm/vcpu stats' data block in the file + * pointed by vm/vcpu stats fd. + * + * This is the header userspace needs to read from stats fd before any other + * readings. It is used by userspace to discover all the information about the + * vm/vcpu's binary statistics. + * Userspace reads this header from the start of the vm/vcpu's stats fd. + */ +struct kvm_stats_header { + __u32 flags; + __u32 name_size; + __u32 num_desc; + __u32 id_offset; + __u32 desc_offset; + __u32 data_offset; +}; + +#define KVM_STATS_TYPE_SHIFT 0 +#define KVM_STATS_TYPE_MASK (0xF << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK + +#define KVM_STATS_UNIT_SHIFT 4 +#define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_NONE (0x0 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES + +#define KVM_STATS_BASE_SHIFT 8 +#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_POW10 (0x0 << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_POW2 (0x1 << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_MAX KVM_STATS_BASE_POW2 + +/** + * struct kvm_stats_desc - Descriptor of a KVM statistics. + * @flags: Annotations of the stats, like type, unit, etc. + * @exponent: Used together with @flags to determine the unit. + * @size: The number of data items for this stats. + * Every data item is of type __u64. + * @offset: The offset of the stats to the start of stat structure in + * struture kvm or kvm_vcpu. + * @unused: Unused field for future usage. Always 0 for now. + * @name: The name string for the stats. Its size is indicated by the + * &kvm_stats_header->name_size. + */ +struct kvm_stats_desc { + __u32 flags; + __s16 exponent; + __u16 size; + __u32 offset; + __u32 unused; + char name[]; +}; + +#define KVM_GET_STATS_FD _IO(KVMIO, 0xce) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From 19238e75bd8ed8ffe784bf5b37586e77b2093742 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 10 May 2021 07:48:33 -0700 Subject: kvm: x86: Allow userspace to handle emulation errors Add a fallback mechanism to the in-kernel instruction emulator that allows userspace the opportunity to process an instruction the emulator was unable to. When the in-kernel instruction emulator fails to process an instruction it will either inject a #UD into the guest or exit to userspace with exit reason KVM_INTERNAL_ERROR. This is because it does not know how to proceed in an appropriate manner. This feature lets userspace get involved to see if it can figure out a better path forward. Signed-off-by: Aaron Lewis Reviewed-by: David Edmondson Message-Id: <20210510144834.658457-2-aaronlewis@google.com> Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f1ba602260f6..68c9e6d8bbda 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -280,6 +280,9 @@ struct kvm_xen_exit { /* Encounter unexpected vm-exit reason */ #define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4 +/* Flags that describe what fields in emulation_failure hold valid data. */ +#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ @@ -383,6 +386,25 @@ struct kvm_run { __u32 ndata; __u64 data[16]; } internal; + /* + * KVM_INTERNAL_ERROR_EMULATION + * + * "struct emulation_failure" is an overlay of "struct internal" + * that is used for the KVM_INTERNAL_ERROR_EMULATION sub-type of + * KVM_EXIT_INTERNAL_ERROR. Note, unlike other internal error + * sub-types, this struct is ABI! It also needs to be backwards + * compatible with "struct internal". Take special care that + * "ndata" is correct, that new fields are enumerated in "flags", + * and that each flag enumerates fields that are 64-bit aligned + * and sized (so that ndata+internal.data[] is valid/accurate). + */ + struct { + __u32 suberror; + __u32 ndata; + __u64 flags; + __u8 insn_size; + __u8 insn_bytes[15]; + } emulation_failure; /* KVM_EXIT_OSI */ struct { __u64 gprs[32]; @@ -1088,6 +1110,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_EXIT_HYPERCALL 201 #define KVM_CAP_PPC_RPT_INVALIDATE 202 #define KVM_CAP_BINARY_STATS_FD 203 +#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 808e9df477757955a9644ca323010339be0c40ee Mon Sep 17 00:00:00 2001 From: Gleb Fotengauer-Malinovskiy Date: Fri, 25 Jun 2021 20:36:55 +0300 Subject: userfaultfd: uapi: fix UFFDIO_CONTINUE ioctl request definition This ioctl request reads from uffdio_continue structure written by userspace which justifies _IOC_WRITE flag. It also writes back to that structure which justifies _IOC_READ flag. See NOTEs in include/uapi/asm-generic/ioctl.h for more information. Fixes: f619147104c8 ("userfaultfd: add UFFDIO_CONTINUE ioctl") Signed-off-by: Gleb Fotengauer-Malinovskiy Acked-by: Peter Xu Reviewed-by: Axel Rasmussen Reviewed-by: Dmitry V. Levin Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index bafbeb1a2624..650480f41f1d 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -80,8 +80,8 @@ struct uffdio_zeropage) #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ struct uffdio_writeprotect) -#define UFFDIO_CONTINUE _IOR(UFFDIO, _UFFDIO_CONTINUE, \ - struct uffdio_continue) +#define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ + struct uffdio_continue) /* read() structure */ struct uffd_msg { -- cgit v1.2.3 From 0ae71c7720e3ae3aabd2e8a072d27f7bd173d25c Mon Sep 17 00:00:00 2001 From: Rodrigo Campos Date: Mon, 17 May 2021 12:39:07 -0700 Subject: seccomp: Support atomic "addfd + send reply" Alban Crequy reported a race condition userspace faces when we want to add some fds and make the syscall return them[1] using seccomp notify. The problem is that currently two different ioctl() calls are needed by the process handling the syscalls (agent) for another userspace process (target): SECCOMP_IOCTL_NOTIF_ADDFD to allocate the fd and SECCOMP_IOCTL_NOTIF_SEND to return that value. Therefore, it is possible for the agent to do the first ioctl to add a file descriptor but the target is interrupted (EINTR) before the agent does the second ioctl() call. This patch adds a flag to the ADDFD ioctl() so it adds the fd and returns that value atomically to the target program, as suggested by Kees Cook[2]. This is done by simply allowing seccomp_do_user_notification() to add the fd and return it in this case. Therefore, in this case the target wakes up from the wait in seccomp_do_user_notification() either to interrupt the syscall or to add the fd and return it. This "allocate an fd and return" functionality is useful for syscalls that return a file descriptor only, like connect(2). Other syscalls that return a file descriptor but not as return value (or return more than one fd), like socketpair(), pipe(), recvmsg with SCM_RIGHTs, will not work with this flag. This effectively combines SECCOMP_IOCTL_NOTIF_ADDFD and SECCOMP_IOCTL_NOTIF_SEND into an atomic opteration. The notification's return value, nor error can be set by the user. Upon successful invocation of the SECCOMP_IOCTL_NOTIF_ADDFD ioctl with the SECCOMP_ADDFD_FLAG_SEND flag, the notifying process's errno will be 0, and the return value will be the file descriptor number that was installed. [1]: https://lore.kernel.org/lkml/CADZs7q4sw71iNHmV8EOOXhUKJMORPzF7thraxZYddTZsxta-KQ@mail.gmail.com/ [2]: https://lore.kernel.org/lkml/202012011322.26DCBC64F2@keescook/ Signed-off-by: Rodrigo Campos Signed-off-by: Sargun Dhillon Acked-by: Tycho Andersen Acked-by: Christian Brauner Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210517193908.3113-4-sargun@sargun.me --- include/uapi/linux/seccomp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 6ba18b82a02e..78074254ab98 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -115,6 +115,7 @@ struct seccomp_notif_resp { /* valid flags for seccomp_notif_addfd */ #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ +#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */ /** * struct seccomp_notif_addfd -- cgit v1.2.3 From 9ba6a1c06279ce499fcf755d8134d679a1f3b4ed Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 24 Jun 2021 15:09:59 +0100 Subject: io_uring: simplify struct io_uring_sqe layout Flatten struct io_uring_sqe, the last union is exactly 64B, so move them out of union { struct { ... }}, and decrease __pad2 size. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/2e21ef7aed136293d654450bc3088973a8adc730.1624543113.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index f1f9ac114b51..79126d5cd289 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -46,21 +46,17 @@ struct io_uring_sqe { __u32 unlink_flags; }; __u64 user_data; /* data to be passed back at completion time */ + /* pack this to avoid bogus arm OABI complaints */ union { - struct { - /* pack this to avoid bogus arm OABI complaints */ - union { - /* index into fixed buffers, if used */ - __u16 buf_index; - /* for grouped buffer selection */ - __u16 buf_group; - } __attribute__((packed)); - /* personality to use, if used */ - __u16 personality; - __s32 splice_fd_in; - }; - __u64 __pad2[3]; - }; + /* index into fixed buffers, if used */ + __u16 buf_index; + /* for grouped buffer selection */ + __u16 buf_group; + } __attribute__((packed)); + /* personality to use, if used */ + __u16 personality; + __s32 splice_fd_in; + __u64 __pad2[2]; }; enum { -- cgit v1.2.3 From 6497ef8df568afbf5f3e38825a4590ff41611a54 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Thu, 29 Apr 2021 15:58:28 +0530 Subject: nbd: provide a way for userspace processes to identify device backends Problem: On reconfigure of device, there is no way to defend if the backend storage is matching with the initial backend storage. Say, if an initial connect request for backend "pool1/image1" got mapped to /dev/nbd0 and the userspace process is terminated. A next reconfigure request within NBD_ATTR_DEAD_CONN_TIMEOUT is allowed to use /dev/nbd0 for a different backend "pool1/image2" For example, an operation like below could be dangerous: $ sudo rbd-nbd map --try-netlink rbd-pool/ext4-image /dev/nbd0 $ sudo blkid /dev/nbd0 /dev/nbd0: UUID="bfc444b4-64b1-418f-8b36-6e0d170cfc04" TYPE="ext4" $ sudo pkill -9 rbd-nbd $ sudo rbd-nbd attach --try-netlink --device /dev/nbd0 rbd-pool/xfs-image /dev/nbd0 $ sudo blkid /dev/nbd0 /dev/nbd0: UUID="d29bf343-6570-4069-a9ea-2fa156ced908" TYPE="xfs" Solution: Provide a way for userspace processes to keep some metadata to identify between the device and the backend, so that when a reconfigure request is made, we can compare and avoid such dangerous operations. With this solution, as part of the initial connect request, backend path can be stored in the sysfs per device config, so that on a reconfigure request it's easy to check if the backend path matches with the initial connect backend path. Please note, ioctl interface to nbd will not have these changes, as there won't be any reconfigure. Signed-off-by: Prasanna Kumar Kalever Reviewed-by: Xiubo Li Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210429102828.31248-1-prasanna.kalever@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/nbd-netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nbd-netlink.h b/include/uapi/linux/nbd-netlink.h index c5d0ef7aa7d5..2d0b90964227 100644 --- a/include/uapi/linux/nbd-netlink.h +++ b/include/uapi/linux/nbd-netlink.h @@ -35,6 +35,7 @@ enum { NBD_ATTR_SOCKETS, NBD_ATTR_DEAD_CONN_TIMEOUT, NBD_ATTR_DEVICE_LIST, + NBD_ATTR_BACKEND_IDENTIFIER, __NBD_ATTR_MAX, }; #define NBD_ATTR_MAX (__NBD_ATTR_MAX - 1) -- cgit v1.2.3 From 964ab0040ff9598783bf37776b5e31b27b50e293 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Wed, 30 Jun 2021 18:49:27 -0700 Subject: userfaultfd/shmem: advertise shmem minor fault support Now that the feature is fully implemented (the faulting path hooks exist so userspace is notified, and the ioctl to resolve such faults is available), advertise this as a supported feature. Link: https://lkml.kernel.org/r/20210503180737.2487560-6-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Acked-by: Hugh Dickins Acked-by: Peter Xu Cc: Alexander Viro Cc: Andrea Arcangeli Cc: Brian Geffon Cc: "Dr . David Alan Gilbert" Cc: Jerome Glisse Cc: Joe Perches Cc: Kirill A. Shutemov Cc: Lokesh Gidra Cc: Mike Kravetz Cc: Mike Rapoport Cc: Mina Almasry Cc: Oliver Upton Cc: Shaohua Li Cc: Shuah Khan Cc: Stephen Rothwell Cc: Wang Qing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 650480f41f1d..05b31d60acf6 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -31,7 +31,8 @@ UFFD_FEATURE_MISSING_SHMEM | \ UFFD_FEATURE_SIGBUS | \ UFFD_FEATURE_THREAD_ID | \ - UFFD_FEATURE_MINOR_HUGETLBFS) + UFFD_FEATURE_MINOR_HUGETLBFS | \ + UFFD_FEATURE_MINOR_SHMEM) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -185,6 +186,9 @@ struct uffdio_api { * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults * can be intercepted (via REGISTER_MODE_MINOR) for * hugetlbfs-backed pages. + * + * UFFD_FEATURE_MINOR_SHMEM indicates the same support as + * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -196,6 +200,7 @@ struct uffdio_api { #define UFFD_FEATURE_SIGBUS (1<<7) #define UFFD_FEATURE_THREAD_ID (1<<8) #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) +#define UFFD_FEATURE_MINOR_SHMEM (1<<10) __u64 features; __u64 ioctls; -- cgit v1.2.3 From 7858d7bca7fbbbbd5b940d2ec371b2d060b21b84 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 30 Jun 2021 18:51:00 -0700 Subject: mm/mempolicy: don't handle MPOL_LOCAL like a fake MPOL_PREFERRED policy MPOL_LOCAL policy has been setup as a real policy, but it is still handled like a faked POL_PREFERRED policy with one internal MPOL_F_LOCAL flag bit set, and there are many places having to judge the real 'prefer' or the 'local' policy, which are quite confusing. In current code, there are 4 cases that MPOL_LOCAL are used: 1. user specifies 'local' policy 2. user specifies 'prefer' policy, but with empty nodemask 3. system 'default' policy is used 4. 'prefer' policy + valid 'preferred' node with MPOL_F_STATIC_NODES flag set, and when it is 'rebind' to a nodemask which doesn't contains the 'preferred' node, it will perform as 'local' policy So make 'local' a real policy instead of a fake 'prefer' one, and kill MPOL_F_LOCAL bit, which can greatly reduce the confusion for code reading. For case 4, the logic of mpol_rebind_preferred() is confusing, as Michal Hocko pointed out: : I do believe that rebinding preferred policy is just bogus and it should : be dropped altogether on the ground that a preference is a mere hint from : userspace where to start the allocation. Unless I am missing something : cpusets will be always authoritative for the final placement. The : preferred node just acts as a starting point and it should be really : preserved when cpusets changes. Otherwise we have a very subtle behavior : corner cases. So dump all the tricky transformation between 'prefer' and 'local', and just record the new nodemask of rebinding. [feng.tang@intel.com: fix a problem in mpol_set_nodemask(), per Michal Hocko] Link: https://lkml.kernel.org/r/1622560492-1294-3-git-send-email-feng.tang@intel.com [feng.tang@intel.com: refine code and comments of mpol_set_nodemask(), per Michal] Link: https://lkml.kernel.org/r/20210603081807.GE56979@shbuild999.sh.intel.com Link: https://lkml.kernel.org/r/1622469956-82897-3-git-send-email-feng.tang@intel.com Signed-off-by: Feng Tang Suggested-by: Michal Hocko Acked-by: Michal Hocko Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Ben Widawsky Cc: Dan Williams Cc: Dave Hansen Cc: David Rientjes Cc: Huang Ying Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Kravetz Cc: Randy Dunlap Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/mempolicy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 4832fd0b5642..19a00bc7fe86 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -60,7 +60,6 @@ enum { * are never OR'ed into the mode in mempolicy API arguments. */ #define MPOL_F_SHARED (1 << 0) /* identify shared policies */ -#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ #define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ #define MPOL_F_MORON (1 << 4) /* Migrate On protnone Reference On Node */ -- cgit v1.2.3 From d61914ea6adabde9126b0bed64a7a3a42249435e Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Mon, 10 May 2021 16:10:14 +0800 Subject: virtio: update virtio id table, add transitional ids This commit updates virtio id table by adding transitional device ids Signed-off-by: Zhu Lingshan Link: https://lore.kernel.org/r/20210510081015.4212-2-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ids.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 4fe842c3a3a9..70a8057ad4bb 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -57,4 +57,16 @@ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */ +/* + * Virtio Transitional IDs + */ + +#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */ +#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */ +#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */ +#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */ +#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */ +#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */ +#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */ + #endif /* _LINUX_VIRTIO_IDS_H */ -- cgit v1.2.3 From 347269c113f10fbe893f11dd3ae5f44aa15d3111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Sat, 3 Jul 2021 15:13:02 +0000 Subject: PCI: Fix kernel-doc formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix kernel-doc formatting throughout drivers/pci and related include files. No change to functionality intended. Check for warnings: $ find include drivers/pci -type f -path "*pci*.[ch]" | xargs scripts/kernel-doc -none [bhelgaas: squashed to one commit] Link: https://lore.kernel.org/r/20210509030237.368540-1-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-1-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-2-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-3-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-4-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-5-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pcitest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h index c3ab4c826297..f9c1af8d141b 100644 --- a/include/uapi/linux/pcitest.h +++ b/include/uapi/linux/pcitest.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/** +/* * pcitest.h - PCI test uapi defines * * Copyright (C) 2017 Texas Instruments -- cgit v1.2.3 From 1507f51255c9ff07d75909a84e7c0d7f3c4b2f49 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 7 Jul 2021 18:08:03 -0700 Subject: mm: introduce memfd_secret system call to create "secret" memory areas Introduce "memfd_secret" system call with the ability to create memory areas visible only in the context of the owning process and not mapped not only to other processes but in the kernel page tables as well. The secretmem feature is off by default and the user must explicitly enable it at the boot time. Once secretmem is enabled, the user will be able to create a file descriptor using the memfd_secret() system call. The memory areas created by mmap() calls from this file descriptor will be unmapped from the kernel direct map and they will be only mapped in the page table of the processes that have access to the file descriptor. Secretmem is designed to provide the following protections: * Enhanced protection (in conjunction with all the other in-kernel attack prevention systems) against ROP attacks. Seceretmem makes "simple" ROP insufficient to perform exfiltration, which increases the required complexity of the attack. Along with other protections like the kernel stack size limit and address space layout randomization which make finding gadgets is really hard, absence of any in-kernel primitive for accessing secret memory means the one gadget ROP attack can't work. Since the only way to access secret memory is to reconstruct the missing mapping entry, the attacker has to recover the physical page and insert a PTE pointing to it in the kernel and then retrieve the contents. That takes at least three gadgets which is a level of difficulty beyond most standard attacks. * Prevent cross-process secret userspace memory exposures. Once the secret memory is allocated, the user can't accidentally pass it into the kernel to be transmitted somewhere. The secreremem pages cannot be accessed via the direct map and they are disallowed in GUP. * Harden against exploited kernel flaws. In order to access secretmem, a kernel-side attack would need to either walk the page tables and create new ones, or spawn a new privileged uiserspace process to perform secrets exfiltration using ptrace. The file descriptor based memory has several advantages over the "traditional" mm interfaces, such as mlock(), mprotect(), madvise(). File descriptor approach allows explicit and controlled sharing of the memory areas, it allows to seal the operations. Besides, file descriptor based memory paves the way for VMMs to remove the secret memory range from the userspace hipervisor process, for instance QEMU. Andy Lutomirski says: "Getting fd-backed memory into a guest will take some possibly major work in the kernel, but getting vma-backed memory into a guest without mapping it in the host user address space seems much, much worse." memfd_secret() is made a dedicated system call rather than an extension to memfd_create() because it's purpose is to allow the user to create more secure memory mappings rather than to simply allow file based access to the memory. Nowadays a new system call cost is negligible while it is way simpler for userspace to deal with a clear-cut system calls than with a multiplexer or an overloaded syscall. Moreover, the initial implementation of memfd_secret() is completely distinct from memfd_create() so there is no much sense in overloading memfd_create() to begin with. If there will be a need for code sharing between these implementation it can be easily achieved without a need to adjust user visible APIs. The secret memory remains accessible in the process context using uaccess primitives, but it is not exposed to the kernel otherwise; secret memory areas are removed from the direct map and functions in the follow_page()/get_user_page() family will refuse to return a page that belongs to the secret memory area. Once there will be a use case that will require exposing secretmem to the kernel it will be an opt-in request in the system call flags so that user would have to decide what data can be exposed to the kernel. Removing of the pages from the direct map may cause its fragmentation on architectures that use large pages to map the physical memory which affects the system performance. However, the original Kconfig text for CONFIG_DIRECT_GBPAGES said that gigabyte pages in the direct map "... can improve the kernel's performance a tiny bit ..." (commit 00d1c5e05736 ("x86: add gbpages switches")) and the recent report [1] showed that "... although 1G mappings are a good default choice, there is no compelling evidence that it must be the only choice". Hence, it is sufficient to have secretmem disabled by default with the ability of a system administrator to enable it at boot time. Pages in the secretmem regions are unevictable and unmovable to avoid accidental exposure of the sensitive data via swap or during page migration. Since the secretmem mappings are locked in memory they cannot exceed RLIMIT_MEMLOCK. Since these mappings are already locked independently from mlock(), an attempt to mlock()/munlock() secretmem range would fail and mlockall()/munlockall() will ignore secretmem mappings. However, unlike mlock()ed memory, secretmem currently behaves more like long-term GUP: secretmem mappings are unmovable mappings directly consumed by user space. With default limits, there is no excessive use of secretmem and it poses no real problem in combination with ZONE_MOVABLE/CMA, but in the future this should be addressed to allow balanced use of large amounts of secretmem along with ZONE_MOVABLE/CMA. A page that was a part of the secret memory area is cleared when it is freed to ensure the data is not exposed to the next user of that page. The following example demonstrates creation of a secret mapping (error handling is omitted): fd = memfd_secret(0); ftruncate(fd, MAP_SIZE); ptr = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); [1] https://lore.kernel.org/linux-mm/213b4567-46ce-f116-9cdf-bbd0c884eb3c@linux.intel.com/ [akpm@linux-foundation.org: suppress Kconfig whine] Link: https://lkml.kernel.org/r/20210518072034.31572-5-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Hagen Paul Pfeifer Acked-by: James Bottomley Cc: Alexander Viro Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christopher Lameter Cc: Dan Williams Cc: Dave Hansen Cc: Elena Reshetova Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Bottomley Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Mark Rutland Cc: Michael Kerrisk Cc: Palmer Dabbelt Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Rick Edgecombe Cc: Roman Gushchin Cc: Shakeel Butt Cc: Shuah Khan Cc: Thomas Gleixner Cc: Tycho Andersen Cc: Will Deacon Cc: David Hildenbrand Cc: kernel test robot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index f3956fc11de6..35687dcb1a42 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -97,5 +97,6 @@ #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ #define Z3FOLD_MAGIC 0x33 #define PPC_CMM_MAGIC 0xc7571590 +#define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #endif /* __LINUX_MAGIC_H__ */ -- cgit v1.2.3