From 1ff79a4a49c239dedd67a12a16a8c3a8b53cf838 Mon Sep 17 00:00:00 2001 From: Christian Gmeiner Date: Wed, 16 Dec 2020 12:42:01 +0100 Subject: drm/etnaviv: provide more ID values via GET_PARAM ioctl. Make it possible for the user space to access these ID values. Signed-off-by: Christian Gmeiner Signed-off-by: Lucas Stach --- include/uapi/drm/etnaviv_drm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h index 09d0df8b71c5..af024d90453d 100644 --- a/include/uapi/drm/etnaviv_drm.h +++ b/include/uapi/drm/etnaviv_drm.h @@ -74,6 +74,9 @@ struct drm_etnaviv_timespec { #define ETNAVIV_PARAM_GPU_NUM_CONSTANTS 0x19 #define ETNAVIV_PARAM_GPU_NUM_VARYINGS 0x1a #define ETNAVIV_PARAM_SOFTPIN_START_ADDR 0x1b +#define ETNAVIV_PARAM_GPU_PRODUCT_ID 0x1c +#define ETNAVIV_PARAM_GPU_CUSTOMER_ID 0x1d +#define ETNAVIV_PARAM_GPU_ECO_ID 0x1e #define ETNA_MAX_PIPES 4 -- cgit v1.2.3 From 2ef6a01fb636997bfec57b015d4e4b5a745ef38a Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Apr 2021 11:57:38 +0100 Subject: drm/i915/uapi: fix kernel doc warnings Fix the cases where it is almost already valid kernel doc, for the others just nerf the warnings for now. Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Jordan Justen Cc: Daniel Vetter Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210419105741.27844-1-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index ddc47bbf48b6..92da48e935d1 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1054,12 +1054,12 @@ struct drm_i915_gem_exec_fence { __u32 flags; }; -/** +/* * See drm_i915_gem_execbuffer_ext_timeline_fences. */ #define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 -/** +/* * This structure describes an array of drm_syncobj and associated points for * timeline variants of drm_syncobj. It is invalid to append this structure to * the execbuf if I915_EXEC_FENCE_ARRAY is set. @@ -1700,7 +1700,7 @@ struct drm_i915_gem_context_param { __u64 value; }; -/** +/* * Context SSEU programming * * It may be necessary for either functional or performance reason to configure @@ -2067,7 +2067,7 @@ struct drm_i915_perf_open_param { __u64 properties_ptr; }; -/** +/* * Enable data capture for a stream that was either opened in a disabled state * via I915_PERF_FLAG_DISABLED or was later disabled via * I915_PERF_IOCTL_DISABLE. @@ -2081,7 +2081,7 @@ struct drm_i915_perf_open_param { */ #define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) -/** +/* * Disable data capture for a stream. * * It is an error to try and read a stream that is disabled. @@ -2090,7 +2090,7 @@ struct drm_i915_perf_open_param { */ #define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) -/** +/* * Change metrics_set captured by a stream. * * If the stream is bound to a specific context, the configuration change @@ -2103,7 +2103,7 @@ struct drm_i915_perf_open_param { */ #define I915_PERF_IOCTL_CONFIG _IO('i', 0x2) -/** +/* * Common to all i915 perf records */ struct drm_i915_perf_record_header { @@ -2151,7 +2151,7 @@ enum drm_i915_perf_record_type { DRM_I915_PERF_RECORD_MAX /* non-ABI */ }; -/** +/* * Structure to upload perf dynamic configuration into the kernel. */ struct drm_i915_perf_oa_config { @@ -2292,21 +2292,21 @@ struct drm_i915_query_topology_info { * Describes one engine and it's capabilities as known to the driver. */ struct drm_i915_engine_info { - /** Engine class and instance. */ + /** @engine: Engine class and instance. */ struct i915_engine_class_instance engine; - /** Reserved field. */ + /** @rsvd0: Reserved field. */ __u32 rsvd0; - /** Engine flags. */ + /** @flags: Engine flags. */ __u64 flags; - /** Capabilities of this engine. */ + /** @capabilities: Capabilities of this engine. */ __u64 capabilities; #define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0) #define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1) - /** Reserved fields. */ + /** @rsvd1: Reserved fields. */ __u64 rsvd1[4]; }; @@ -2317,13 +2317,13 @@ struct drm_i915_engine_info { * an array of struct drm_i915_engine_info structures. */ struct drm_i915_query_engine_info { - /** Number of struct drm_i915_engine_info structs following. */ + /** @num_engines: Number of struct drm_i915_engine_info structs following. */ __u32 num_engines; - /** MBZ */ + /** @rsvd: MBZ */ __u32 rsvd[3]; - /** Marker for drm_i915_engine_info structures. */ + /** @engines: Marker for drm_i915_engine_info structures. */ struct drm_i915_engine_info engines[]; }; -- cgit v1.2.3 From 19d053d477a145069bc2c38b28dcee773690c804 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Apr 2021 11:57:40 +0100 Subject: drm/i915/uapi: convert i915_user_extension to kernel doc Add some example usage for the extension chaining also, which is quite nifty. v2: (Daniel) - clarify that the name is just some integer, also document that the name space is not global v3: prefer kernel-doc references for structs Suggested-by: Daniel Vetter Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Jordan Justen Cc: Daniel Vetter Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Daniel Vetter Reviewed-by: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210419105741.27844-3-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 54 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 92da48e935d1..e2867d8cd5e3 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -62,8 +62,8 @@ extern "C" { #define I915_ERROR_UEVENT "ERROR" #define I915_RESET_UEVENT "RESET" -/* - * i915_user_extension: Base class for defining a chain of extensions +/** + * struct i915_user_extension - Base class for defining a chain of extensions * * Many interfaces need to grow over time. In most cases we can simply * extend the struct and have userspace pass in more data. Another option, @@ -76,12 +76,58 @@ extern "C" { * increasing complexity, and for large parts of that interface to be * entirely optional. The downside is more pointer chasing; chasing across * the __user boundary with pointers encapsulated inside u64. + * + * Example chaining: + * + * .. code-block:: C + * + * struct i915_user_extension ext3 { + * .next_extension = 0, // end + * .name = ..., + * }; + * struct i915_user_extension ext2 { + * .next_extension = (uintptr_t)&ext3, + * .name = ..., + * }; + * struct i915_user_extension ext1 { + * .next_extension = (uintptr_t)&ext2, + * .name = ..., + * }; + * + * Typically the struct i915_user_extension would be embedded in some uAPI + * struct, and in this case we would feed it the head of the chain(i.e ext1), + * which would then apply all of the above extensions. + * */ struct i915_user_extension { + /** + * @next_extension: + * + * Pointer to the next struct i915_user_extension, or zero if the end. + */ __u64 next_extension; + /** + * @name: Name of the extension. + * + * Note that the name here is just some integer. + * + * Also note that the name space for this is not global for the whole + * driver, but rather its scope/meaning is limited to the specific piece + * of uAPI which has embedded the struct i915_user_extension. + */ __u32 name; - __u32 flags; /* All undefined bits must be zero. */ - __u32 rsvd[4]; /* Reserved for future use; must be zero. */ + /** + * @flags: MBZ + * + * All undefined bits must be zero. + */ + __u32 flags; + /** + * @rsvd: MBZ + * + * Reserved for future use; must be zero. + */ + __u32 rsvd[4]; }; /* -- cgit v1.2.3 From e3bdccafb5bcfab286f4f3d57d35efc4e7368c18 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Apr 2021 11:57:41 +0100 Subject: drm/i915/uapi: convert i915_query and friend to kernel doc Add a note about the two-step process. v2(Tvrtko): - Also document the other method of just passing in a buffer which is large enough, which avoids two ioctl calls. Can make sense for smaller query items. v3: prefer kernel-doc references for structs and members Suggested-by: Daniel Vetter Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Daniel Vetter Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Daniel Vetter Reviewed-by: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210419105741.27844-4-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 69 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 14 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e2867d8cd5e3..6a34243a7646 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2218,53 +2218,94 @@ struct drm_i915_perf_oa_config { __u64 flex_regs_ptr; }; +/** + * struct drm_i915_query_item - An individual query for the kernel to process. + * + * The behaviour is determined by the @query_id. Note that exactly what + * @data_ptr is also depends on the specific @query_id. + */ struct drm_i915_query_item { + /** @query_id: The id for this query */ __u64 query_id; #define DRM_I915_QUERY_TOPOLOGY_INFO 1 #define DRM_I915_QUERY_ENGINE_INFO 2 #define DRM_I915_QUERY_PERF_CONFIG 3 /* Must be kept compact -- no holes and well documented */ - /* + /** + * @length: + * * When set to zero by userspace, this is filled with the size of the - * data to be written at the data_ptr pointer. The kernel sets this + * data to be written at the @data_ptr pointer. The kernel sets this * value to a negative value to signal an error on a particular query * item. */ __s32 length; - /* + /** + * @flags: + * * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. * * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the - * following : - * - DRM_I915_QUERY_PERF_CONFIG_LIST - * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID - * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID + * following: + * + * - DRM_I915_QUERY_PERF_CONFIG_LIST + * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID + * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID */ __u32 flags; #define DRM_I915_QUERY_PERF_CONFIG_LIST 1 #define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2 #define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID 3 - /* - * Data will be written at the location pointed by data_ptr when the - * value of length matches the length of the data to be written by the + /** + * @data_ptr: + * + * Data will be written at the location pointed by @data_ptr when the + * value of @length matches the length of the data to be written by the * kernel. */ __u64 data_ptr; }; +/** + * struct drm_i915_query - Supply an array of struct drm_i915_query_item for the + * kernel to fill out. + * + * Note that this is generally a two step process for each struct + * drm_i915_query_item in the array: + * + * 1. Call the DRM_IOCTL_I915_QUERY, giving it our array of struct + * drm_i915_query_item, with &drm_i915_query_item.length set to zero. The + * kernel will then fill in the size, in bytes, which tells userspace how + * memory it needs to allocate for the blob(say for an array of properties). + * + * 2. Next we call DRM_IOCTL_I915_QUERY again, this time with the + * &drm_i915_query_item.data_ptr equal to our newly allocated blob. Note that + * the &drm_i915_query_item.length should still be the same as what the + * kernel previously set. At this point the kernel can fill in the blob. + * + * Note that for some query items it can make sense for userspace to just pass + * in a buffer/blob equal to or larger than the required size. In this case only + * a single ioctl call is needed. For some smaller query items this can work + * quite well. + * + */ struct drm_i915_query { + /** @num_items: The number of elements in the @items_ptr array */ __u32 num_items; - /* - * Unused for now. Must be cleared to zero. + /** + * @flags: Unused for now. Must be cleared to zero. */ __u32 flags; - /* - * This points to an array of num_items drm_i915_query_item structures. + /** + * @items_ptr: + * + * Pointer to an array of struct drm_i915_query_item. The number of + * array elements is @num_items. */ __u64 items_ptr; }; -- cgit v1.2.3 From 40ce74d1b28d38e5debc14b5a6ddd9071ae2d310 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 5 Feb 2020 17:59:15 -0500 Subject: drm/amdkfd: add svm ioctl API Add svm (shared virtual memory) ioctl data structure and API definition. The svm ioctl API is designed to be extensible in the future. All operations are provided by a single IOCTL to preserve ioctl number space. The arguments structure ends with a variable size array of attributes that can be used to set or get one or multiple attributes. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 130 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 128 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index bf5e7d7846dd..247b57baa94f 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -30,9 +30,10 @@ * - 1.1 - initial version * - 1.3 - Add SMI events support * - 1.4 - Indicate new SRAM EDC bit in device properties + * - 1.5 - Add SVM API */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 4 +#define KFD_IOCTL_MINOR_VERSION 5 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -473,6 +474,129 @@ enum kfd_mmio_remap { KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4, }; +/* Guarantee host access to memory */ +#define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x00000001 +/* Fine grained coherency between all devices with access */ +#define KFD_IOCTL_SVM_FLAG_COHERENT 0x00000002 +/* Use any GPU in same hive as preferred device */ +#define KFD_IOCTL_SVM_FLAG_HIVE_LOCAL 0x00000004 +/* GPUs only read, allows replication */ +#define KFD_IOCTL_SVM_FLAG_GPU_RO 0x00000008 +/* Allow execution on GPU */ +#define KFD_IOCTL_SVM_FLAG_GPU_EXEC 0x00000010 +/* GPUs mostly read, may allow similar optimizations as RO, but writes fault */ +#define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY 0x00000020 + +/** + * kfd_ioctl_svm_op - SVM ioctl operations + * + * @KFD_IOCTL_SVM_OP_SET_ATTR: Modify one or more attributes + * @KFD_IOCTL_SVM_OP_GET_ATTR: Query one or more attributes + */ +enum kfd_ioctl_svm_op { + KFD_IOCTL_SVM_OP_SET_ATTR, + KFD_IOCTL_SVM_OP_GET_ATTR +}; + +/** kfd_ioctl_svm_location - Enum for preferred and prefetch locations + * + * GPU IDs are used to specify GPUs as preferred and prefetch locations. + * Below definitions are used for system memory or for leaving the preferred + * location unspecified. + */ +enum kfd_ioctl_svm_location { + KFD_IOCTL_SVM_LOCATION_SYSMEM = 0, + KFD_IOCTL_SVM_LOCATION_UNDEFINED = 0xffffffff +}; + +/** + * kfd_ioctl_svm_attr_type - SVM attribute types + * + * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: gpuid of the preferred location, 0 for + * system memory + * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: gpuid of the prefetch location, 0 for + * system memory. Setting this triggers an + * immediate prefetch (migration). + * @KFD_IOCTL_SVM_ATTR_ACCESS: + * @KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: + * @KFD_IOCTL_SVM_ATTR_NO_ACCESS: specify memory access for the gpuid given + * by the attribute value + * @KFD_IOCTL_SVM_ATTR_SET_FLAGS: bitmask of flags to set (see + * KFD_IOCTL_SVM_FLAG_...) + * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS: bitmask of flags to clear + * @KFD_IOCTL_SVM_ATTR_GRANULARITY: migration granularity + * (log2 num pages) + */ +enum kfd_ioctl_svm_attr_type { + KFD_IOCTL_SVM_ATTR_PREFERRED_LOC, + KFD_IOCTL_SVM_ATTR_PREFETCH_LOC, + KFD_IOCTL_SVM_ATTR_ACCESS, + KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE, + KFD_IOCTL_SVM_ATTR_NO_ACCESS, + KFD_IOCTL_SVM_ATTR_SET_FLAGS, + KFD_IOCTL_SVM_ATTR_CLR_FLAGS, + KFD_IOCTL_SVM_ATTR_GRANULARITY +}; + +/** + * kfd_ioctl_svm_attribute - Attributes as pairs of type and value + * + * The meaning of the @value depends on the attribute type. + * + * @type: attribute type (see enum @kfd_ioctl_svm_attr_type) + * @value: attribute value + */ +struct kfd_ioctl_svm_attribute { + __u32 type; + __u32 value; +}; + +/** + * kfd_ioctl_svm_args - Arguments for SVM ioctl + * + * @op specifies the operation to perform (see enum + * @kfd_ioctl_svm_op). @start_addr and @size are common for all + * operations. + * + * A variable number of attributes can be given in @attrs. + * @nattr specifies the number of attributes. New attributes can be + * added in the future without breaking the ABI. If unknown attributes + * are given, the function returns -EINVAL. + * + * @KFD_IOCTL_SVM_OP_SET_ATTR sets attributes for a virtual address + * range. It may overlap existing virtual address ranges. If it does, + * the existing ranges will be split such that the attribute changes + * only apply to the specified address range. + * + * @KFD_IOCTL_SVM_OP_GET_ATTR returns the intersection of attributes + * over all memory in the given range and returns the result as the + * attribute value. If different pages have different preferred or + * prefetch locations, 0xffffffff will be returned for + * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC or + * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC resepctively. For + * @KFD_IOCTL_SVM_ATTR_SET_FLAGS, flags of all pages will be + * aggregated by bitwise AND. The minimum migration granularity + * throughout the range will be returned for + * @KFD_IOCTL_SVM_ATTR_GRANULARITY. + * + * Querying of accessibility attributes works by initializing the + * attribute type to @KFD_IOCTL_SVM_ATTR_ACCESS and the value to the + * GPUID being queried. Multiple attributes can be given to allow + * querying multiple GPUIDs. The ioctl function overwrites the + * attribute type to indicate the access for the specified GPU. + * + * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS is invalid for + * @KFD_IOCTL_SVM_OP_GET_ATTR. + */ +struct kfd_ioctl_svm_args { + __u64 start_addr; + __u64 size; + __u32 op; + __u32 nattr; + /* Variable length array of attributes */ + struct kfd_ioctl_svm_attribute attrs[0]; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -573,7 +697,9 @@ enum kfd_mmio_remap { #define AMDKFD_IOC_SMI_EVENTS \ AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args) +#define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x20 +#define AMDKFD_COMMAND_END 0x21 #endif -- cgit v1.2.3 From 0f7b5c44d4c53710993e4773bd6eaf171f1888e6 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 28 May 2020 18:27:05 -0500 Subject: drm/amdkfd: add ioctl to configure and query xnack retries Xnack retries are used for page fault recovery. Some AMD chip families support continuously retry while page table entries are invalid. The driver must handle the page fault interrupt and fill in a valid entry for the GPU to continue. This ioctl allows to enable/disable XNACK retries per KFD process. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 43 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 247b57baa94f..3cb5b5dd9f77 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -597,6 +597,44 @@ struct kfd_ioctl_svm_args { struct kfd_ioctl_svm_attribute attrs[0]; }; +/** + * kfd_ioctl_set_xnack_mode_args - Arguments for set_xnack_mode + * + * @xnack_enabled: [in/out] Whether to enable XNACK mode for this process + * + * @xnack_enabled indicates whether recoverable page faults should be + * enabled for the current process. 0 means disabled, positive means + * enabled, negative means leave unchanged. If enabled, virtual address + * translations on GFXv9 and later AMD GPUs can return XNACK and retry + * the access until a valid PTE is available. This is used to implement + * device page faults. + * + * On output, @xnack_enabled returns the (new) current mode (0 or + * positive). Therefore, a negative input value can be used to query + * the current mode without changing it. + * + * The XNACK mode fundamentally changes the way SVM managed memory works + * in the driver, with subtle effects on application performance and + * functionality. + * + * Enabling XNACK mode requires shader programs to be compiled + * differently. Furthermore, not all GPUs support changing the mode + * per-process. Therefore changing the mode is only allowed while no + * user mode queues exist in the process. This ensure that no shader + * code is running that may be compiled for the wrong mode. And GPUs + * that cannot change to the requested mode will prevent the XNACK + * mode from occurring. All GPUs used by the process must be in the + * same XNACK mode. + * + * GFXv8 or older GPUs do not support 48 bit virtual addresses or SVM. + * Therefore those GPUs are not considered for the XNACK mode switch. + * + * Return: 0 on success, -errno on failure + */ +struct kfd_ioctl_set_xnack_mode_args { + __s32 xnack_enabled; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -699,7 +737,10 @@ struct kfd_ioctl_svm_args { #define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args) +#define AMDKFD_IOC_SET_XNACK_MODE \ + AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x21 +#define AMDKFD_COMMAND_END 0x22 #endif -- cgit v1.2.3 From 42daecfc2069da98e084e2e2457be2a3d50b0a30 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Thu, 22 Apr 2021 17:20:20 +0200 Subject: drm/amdgpu: remove AMDGPU_GEM_CREATE_SHADOW flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unused AMDGPU_GEM_CREATE_SHADOW flag. Userspace is never allowed to use this. Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 728566542f8a..2063a1c10f79 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -116,8 +116,6 @@ extern "C" { #define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) /* Flag that the memory should be in VRAM and cleared */ #define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) -/* Flag that create shadow bo(GTT) while allocating vram bo */ -#define AMDGPU_GEM_CREATE_SHADOW (1 << 4) /* Flag that allocating the BO should use linear VRAM */ #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) /* Flag that BO is always valid in this VM */ -- cgit v1.2.3 From 8f86c82aba8b13e732cfdd6d0e19a7dd48197e43 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Fri, 2 Apr 2021 13:22:12 +0200 Subject: drm/connector: demote connector force-probes for non-master clients Force-probing a connector can be slow and cause flickering. As this affects the global KMS state, let's make it so only the DRM master can force-probe a connector. Non-master DRM clients won't be able to force-probe a connector anymore. Instead, KMS will perform a regular read-only connector query. Signed-off-by: Simon Ser Acked-by: Pekka Paalanen Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210402112212.5625-1-contact@emersion.fr --- include/uapi/drm/drm_mode.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index a5e76aa06ad5..9b6722d45f36 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -413,9 +413,10 @@ enum drm_mode_subconnector { * * **Force-probing a connector** * - * If the @count_modes field is set to zero, the kernel will perform a forced - * probe on the connector to refresh the connector status, modes and EDID. - * A forced-probe can be slow, might cause flickering and the ioctl will block. + * If the @count_modes field is set to zero and the DRM client is the current + * DRM master, the kernel will perform a forced probe on the connector to + * refresh the connector status, modes and EDID. A forced-probe can be slow, + * might cause flickering and the ioctl will block. * * User-space needs to force-probe connectors to ensure their metadata is * up-to-date at startup and after receiving a hot-plug event. User-space -- cgit v1.2.3 From 710217292a61110a8ccf010cf6886d25e34bf024 Mon Sep 17 00:00:00 2001 From: Abdiel Janulgue Date: Thu, 29 Apr 2021 11:30:50 +0100 Subject: drm/i915/query: Expose memory regions through the query uAPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returns the available memory region areas supported by the HW. v2(Daniel & Jason): - Add some kernel-doc, including example usage. - Drop all the extra rsvd v3(Jason & Tvrtko) - add back rsvd Signed-off-by: Abdiel Janulgue Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Thomas Hellström Cc: Daniele Ceraolo Spurio Cc: Lionel Landwerlin Cc: Jon Bloomfield Cc: Jordan Justen Cc: Daniel Vetter Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Kenneth Graunke Link: https://patchwork.freedesktop.org/patch/msgid/20210429103056.407067-3-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 134 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6a34243a7646..582ee6f7d595 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2230,6 +2230,7 @@ struct drm_i915_query_item { #define DRM_I915_QUERY_TOPOLOGY_INFO 1 #define DRM_I915_QUERY_ENGINE_INFO 2 #define DRM_I915_QUERY_PERF_CONFIG 3 +#define DRM_I915_QUERY_MEMORY_REGIONS 4 /* Must be kept compact -- no holes and well documented */ /** @@ -2464,6 +2465,139 @@ struct drm_i915_query_perf_config { __u8 data[]; }; +/** + * enum drm_i915_gem_memory_class - Supported memory classes + */ +enum drm_i915_gem_memory_class { + /** @I915_MEMORY_CLASS_SYSTEM: System memory */ + I915_MEMORY_CLASS_SYSTEM = 0, + /** @I915_MEMORY_CLASS_DEVICE: Device local-memory */ + I915_MEMORY_CLASS_DEVICE, +}; + +/** + * struct drm_i915_gem_memory_class_instance - Identify particular memory region + */ +struct drm_i915_gem_memory_class_instance { + /** @memory_class: See enum drm_i915_gem_memory_class */ + __u16 memory_class; + + /** @memory_instance: Which instance */ + __u16 memory_instance; +}; + +/** + * struct drm_i915_memory_region_info - Describes one region as known to the + * driver. + * + * Note that we reserve some stuff here for potential future work. As an example + * we might want expose the capabilities for a given region, which could include + * things like if the region is CPU mappable/accessible, what are the supported + * mapping types etc. + * + * Note that to extend struct drm_i915_memory_region_info and struct + * drm_i915_query_memory_regions in the future the plan is to do the following: + * + * .. code-block:: C + * + * struct drm_i915_memory_region_info { + * struct drm_i915_gem_memory_class_instance region; + * union { + * __u32 rsvd0; + * __u32 new_thing1; + * }; + * ... + * union { + * __u64 rsvd1[8]; + * struct { + * __u64 new_thing2; + * __u64 new_thing3; + * ... + * }; + * }; + * }; + * + * With this things should remain source compatible between versions for + * userspace, even as we add new fields. + * + * Note this is using both struct drm_i915_query_item and struct drm_i915_query. + * For this new query we are adding the new query id DRM_I915_QUERY_MEMORY_REGIONS + * at &drm_i915_query_item.query_id. + */ +struct drm_i915_memory_region_info { + /** @region: The class:instance pair encoding */ + struct drm_i915_gem_memory_class_instance region; + + /** @rsvd0: MBZ */ + __u32 rsvd0; + + /** @probed_size: Memory probed by the driver (-1 = unknown) */ + __u64 probed_size; + + /** @unallocated_size: Estimate of memory remaining (-1 = unknown) */ + __u64 unallocated_size; + + /** @rsvd1: MBZ */ + __u64 rsvd1[8]; +}; + +/** + * struct drm_i915_query_memory_regions + * + * The region info query enumerates all regions known to the driver by filling + * in an array of struct drm_i915_memory_region_info structures. + * + * Example for getting the list of supported regions: + * + * .. code-block:: C + * + * struct drm_i915_query_memory_regions *info; + * struct drm_i915_query_item item = { + * .query_id = DRM_I915_QUERY_MEMORY_REGIONS; + * }; + * struct drm_i915_query query = { + * .num_items = 1, + * .items_ptr = (uintptr_t)&item, + * }; + * int err, i; + * + * // First query the size of the blob we need, this needs to be large + * // enough to hold our array of regions. The kernel will fill out the + * // item.length for us, which is the number of bytes we need. + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * info = calloc(1, item.length); + * // Now that we allocated the required number of bytes, we call the ioctl + * // again, this time with the data_ptr pointing to our newly allocated + * // blob, which the kernel can then populate with the all the region info. + * item.data_ptr = (uintptr_t)&info, + * + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * // We can now access each region in the array + * for (i = 0; i < info->num_regions; i++) { + * struct drm_i915_memory_region_info mr = info->regions[i]; + * u16 class = mr.region.class; + * u16 instance = mr.region.instance; + * + * .... + * } + * + * free(info); + */ +struct drm_i915_query_memory_regions { + /** @num_regions: Number of supported regions */ + __u32 num_regions; + + /** @rsvd: MBZ */ + __u32 rsvd[3]; + + /** @regions: Info about each supported region */ + struct drm_i915_memory_region_info regions[]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From ebcb40298947bdb0622e53c69734e6b4fb64b348 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 29 Apr 2021 11:30:52 +0100 Subject: drm/i915/uapi: introduce drm_i915_gem_create_ext Same old gem_create but with now with extensions support. This is needed to support various upcoming usecases. v2:(Chris) - Use separate ioctl number for gem_create_ext, instead of hijacking the existing gem_create ioctl, otherwise we run into the issue with being unable to detect if the kernel supports the new extension behaviour. - We now have gem_create_ext.flags, which should be zeroed. - I915_GEM_CREATE_EXT_SETPARAM value is now zero, since this is the index into our array of extensions. - Setup a "vanilla" object which we can directly apply our extensions to. v3:(Daniel & Jason) - drop I915_GEM_CREATE_EXT_SETPARAM. Instead just have each extension do one thing only, instead of generic setparam which can cover various use cases. - add some kernel-doc. Signed-off-by: Matthew Auld Signed-off-by: CQ Tang Cc: Joonas Lahtinen Cc: Daniele Ceraolo Spurio Cc: Lionel Landwerlin Cc: Jordan Justen Cc: Daniel Vetter Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Kenneth Graunke Link: https://patchwork.freedesktop.org/patch/msgid/20210429103056.407067-5-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 582ee6f7d595..9ab609e0cabb 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -406,6 +406,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_QUERY 0x39 #define DRM_I915_GEM_VM_CREATE 0x3a #define DRM_I915_GEM_VM_DESTROY 0x3b +#define DRM_I915_GEM_CREATE_EXT 0x3c /* Must be kept compact -- no holes */ #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) @@ -438,6 +439,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_ENTERVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT) #define DRM_IOCTL_I915_GEM_LEAVEVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT) #define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) +#define DRM_IOCTL_I915_GEM_CREATE_EXT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE_EXT, struct drm_i915_gem_create_ext) #define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) #define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) #define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) @@ -2598,6 +2600,46 @@ struct drm_i915_query_memory_regions { struct drm_i915_memory_region_info regions[]; }; +/** + * struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added + * extension support using struct i915_user_extension. + * + * Note that in the future we want to have our buffer flags here, at least for + * the stuff that is immutable. Previously we would have two ioctls, one to + * create the object with gem_create, and another to apply various parameters, + * however this creates some ambiguity for the params which are considered + * immutable. Also in general we're phasing out the various SET/GET ioctls. + */ +struct drm_i915_gem_create_ext { + /** + * @size: Requested size for the object. + * + * The (page-aligned) allocated size for the object will be returned. + * + */ + __u64 size; + /** + * @handle: Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; + /** @flags: MBZ */ + __u32 flags; + /** + * @extensions: The chain of extensions to apply to this object. + * + * This will be useful in the future when we need to support several + * different extensions, and we need to apply more than one when + * creating the object. See struct i915_user_extension. + * + * If we don't supply any extensions then we get the same old gem_create + * behaviour. + * + */ + __u64 extensions; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 2459e56fd8af0b47fcbfbdff2fdc02e4077680ec Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 29 Apr 2021 11:30:53 +0100 Subject: drm/i915/uapi: implement object placement extension Add new extension to support setting an immutable-priority-list of potential placements, at creation time. If we use the normal gem_create or gem_create_ext without the extensions/placements then we still get the old behaviour with only placing the object in system memory. v2(Daniel & Jason): - Add a bunch of kernel-doc - Simplify design for placements extension Testcase: igt/gem_create/create-ext-placement-sanity-check Testcase: igt/gem_create/create-ext-placement-each Testcase: igt/gem_create/create-ext-placement-all Signed-off-by: Matthew Auld Signed-off-by: CQ Tang Cc: Joonas Lahtinen Cc: Daniele Ceraolo Spurio Cc: Lionel Landwerlin Cc: Jordan Justen Cc: Daniel Vetter Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Reviewed-by: Kenneth Graunke Link: https://patchwork.freedesktop.org/patch/msgid/20210429103056.407067-6-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 62 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 9ab609e0cabb..c2c7759b7d2e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2616,6 +2616,11 @@ struct drm_i915_gem_create_ext { * * The (page-aligned) allocated size for the object will be returned. * + * Note that for some devices we have might have further minimum + * page-size restrictions(larger than 4K), like for device local-memory. + * However in general the final size here should always reflect any + * rounding up, if for example using the I915_GEM_CREATE_EXT_MEMORY_REGIONS + * extension to place the object in device local-memory. */ __u64 size; /** @@ -2636,10 +2641,67 @@ struct drm_i915_gem_create_ext { * If we don't supply any extensions then we get the same old gem_create * behaviour. * + * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see + * struct drm_i915_gem_create_ext_memory_regions. */ +#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0 __u64 extensions; }; +/** + * struct drm_i915_gem_create_ext_memory_regions - The + * I915_GEM_CREATE_EXT_MEMORY_REGIONS extension. + * + * Set the object with the desired set of placements/regions in priority + * order. Each entry must be unique and supported by the device. + * + * This is provided as an array of struct drm_i915_gem_memory_class_instance, or + * an equivalent layout of class:instance pair encodings. See struct + * drm_i915_query_memory_regions and DRM_I915_QUERY_MEMORY_REGIONS for how to + * query the supported regions for a device. + * + * As an example, on discrete devices, if we wish to set the placement as + * device local-memory we can do something like: + * + * .. code-block:: C + * + * struct drm_i915_gem_memory_class_instance region_lmem = { + * .memory_class = I915_MEMORY_CLASS_DEVICE, + * .memory_instance = 0, + * }; + * struct drm_i915_gem_create_ext_memory_regions regions = { + * .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS }, + * .regions = (uintptr_t)®ion_lmem, + * .num_regions = 1, + * }; + * struct drm_i915_gem_create_ext create_ext = { + * .size = 16 * PAGE_SIZE, + * .extensions = (uintptr_t)®ions, + * }; + * + * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); + * if (err) ... + * + * At which point we get the object handle in &drm_i915_gem_create_ext.handle, + * along with the final object size in &drm_i915_gem_create_ext.size, which + * should account for any rounding up, if required. + */ +struct drm_i915_gem_create_ext_memory_regions { + /** @base: Extension link. See struct i915_user_extension. */ + struct i915_user_extension base; + + /** @pad: MBZ */ + __u32 pad; + /** @num_regions: Number of elements in the @regions array. */ + __u32 num_regions; + /** + * @regions: The regions/placements array. + * + * An array of struct drm_i915_gem_memory_class_instance. + */ + __u64 regions; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 7b229b13d78d112e2c5d4a60a3c6f602289959fa Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 10 Apr 2021 19:56:05 -0700 Subject: HID: hid-input: add mapping for emoji picker key HUTRR101 added a new usage code for a key that is supposed to invoke and dismiss an emoji picker widget to assist users to locate and enter emojis. This patch adds a new key definition KEY_EMOJI_PICKER and maps 0x0c/0x0d9 usage code to this new keycode. Additionally hid-debug is adjusted to recognize this new usage code as well. Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- include/uapi/linux/input-event-codes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index ee93428ced9a..225ec87d4f22 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -611,6 +611,7 @@ #define KEY_VOICECOMMAND 0x246 /* Listening Voice Command */ #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ +#define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ -- cgit v1.2.3 From 285c0faddcebdf360412fc9ef9cde63cf98da7f6 Mon Sep 17 00:00:00 2001 From: Bharat Jauhari Date: Thu, 25 Mar 2021 18:15:40 +0200 Subject: habanalabs: expose ASIC specific PLL index Currently the user cannot interpret the PLL information based on index as its exposed as an integer. This commit exposes ASIC specific PLL indexes and maps it to a generic FW compatible index. Signed-off-by: Bharat Jauhari Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index d3e017b5f0db..6d2d34c9f375 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -239,6 +239,39 @@ enum gaudi_engine_id { GAUDI_ENGINE_ID_SIZE }; +/* + * ASIC specific PLL index + * + * Used to retrieve in frequency info of different IPs via + * HL_INFO_PLL_FREQUENCY under HL_IOCTL_INFO IOCTL. The enums need to be + * used as an index in struct hl_pll_frequency_info + */ + +enum hl_goya_pll_index { + HL_GOYA_CPU_PLL = 0, + HL_GOYA_IC_PLL, + HL_GOYA_MC_PLL, + HL_GOYA_MME_PLL, + HL_GOYA_PCI_PLL, + HL_GOYA_EMMC_PLL, + HL_GOYA_TPC_PLL, + HL_GOYA_PLL_MAX +}; + +enum hl_gaudi_pll_index { + HL_GAUDI_CPU_PLL = 0, + HL_GAUDI_PCI_PLL, + HL_GAUDI_SRAM_PLL, + HL_GAUDI_HBM_PLL, + HL_GAUDI_NIC_PLL, + HL_GAUDI_DMA_PLL, + HL_GAUDI_MESH_PLL, + HL_GAUDI_MME_PLL, + HL_GAUDI_TPC_PLL, + HL_GAUDI_IF_PLL, + HL_GAUDI_PLL_MAX +}; + enum hl_device_status { HL_DEVICE_STATUS_OPERATIONAL, HL_DEVICE_STATUS_IN_RESET, -- cgit v1.2.3 From 63c8af5687f6b1b70e9458cac1ffb25e86db1695 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 10 May 2021 08:48:06 +0900 Subject: block: uapi: fix comment about block device ioctl Fix the comment mentioning ioctl command range used for zoned block devices to reflect the range of commands actually implemented. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210509234806.3000-1-damien.lemoal@wdc.com Signed-off-by: Jens Axboe --- include/uapi/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index f44eb0a04afd..4c32e97dcdf0 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -185,7 +185,7 @@ struct fsxattr { #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) /* - * A jump here: 130-131 are reserved for zoned block devices + * A jump here: 130-136 are reserved for zoned block devices * (see uapi/linux/blkzoned.h) */ -- cgit v1.2.3 From 7ac592aa35a684ff1858fb9ec282886b9e3575ac Mon Sep 17 00:00:00 2001 From: Chris Hyser Date: Wed, 24 Mar 2021 17:40:15 -0400 Subject: sched: prctl() core-scheduling interface This patch provides support for setting and copying core scheduling 'task cookies' between threads (PID), processes (TGID), and process groups (PGID). The value of core scheduling isn't that tasks don't share a core, 'nosmt' can do that. The value lies in exploiting all the sharing opportunities that exist to recover possible lost performance and that requires a degree of flexibility in the API. From a security perspective (and there are others), the thread, process and process group distinction is an existent hierarchal categorization of tasks that reflects many of the security concerns about 'data sharing'. For example, protecting against cache-snooping by a thread that can just read the memory directly isn't all that useful. With this in mind, subcommands to CREATE/SHARE (TO/FROM) provide a mechanism to create and share cookies. CREATE/SHARE_TO specify a target pid with enum pidtype used to specify the scope of the targeted tasks. For example, PIDTYPE_TGID will share the cookie with the process and all of it's threads as typically desired in a security scenario. API: prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, tgtpid, pidtype, &cookie) prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, tgtpid, pidtype, NULL) prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, tgtpid, pidtype, NULL) prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, srcpid, pidtype, NULL) where 'tgtpid/srcpid == 0' implies the current process and pidtype is kernel enum pid_type {PIDTYPE_PID, PIDTYPE_TGID, PIDTYPE_PGID, ...}. For return values, EINVAL, ENOMEM are what they say. ESRCH means the tgtpid/srcpid was not found. EPERM indicates lack of PTRACE permission access to tgtpid/srcpid. ENODEV indicates your machines lacks SMT. [peterz: complete rewrite] Signed-off-by: Chris Hyser Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123309.039845339@infradead.org --- include/uapi/linux/prctl.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 18a9f59dc067..967d9c55323d 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -259,4 +259,12 @@ struct prctl_mm_map { #define PR_PAC_SET_ENABLED_KEYS 60 #define PR_PAC_GET_ENABLED_KEYS 61 +/* Request the scheduler to share a core */ +#define PR_SCHED_CORE 62 +# define PR_SCHED_CORE_GET 0 +# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ +# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ +# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ +# define PR_SCHED_CORE_MAX 4 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From ed5aecd3da2eabd8a6c9f5593df2c4f00985fca2 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 5 May 2021 11:18:54 +0200 Subject: tty: remove broken r3964 line discipline Noone stepped up in the past two years since it was marked as BROKEN by commit c7084edc3f6d (tty: mark Siemens R3964 line discipline as BROKEN). Remove the line discipline for good. Three remarks: * we remove also the uapi header (as noone is able to use that interface anyway) * we do *not* remove the N_R3964 constant definition from tty.h, so it remains reserved. * in_interrupt() check is now removed from vt's con_put_char. Noone else calls tty_operations::put_char from interrupt context. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20210505091928.22010-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/n_r3964.h | 99 -------------------------------------------- 1 file changed, 99 deletions(-) delete mode 100644 include/uapi/linux/n_r3964.h (limited to 'include/uapi') diff --git a/include/uapi/linux/n_r3964.h b/include/uapi/linux/n_r3964.h deleted file mode 100644 index 6bbd18520f30..000000000000 --- a/include/uapi/linux/n_r3964.h +++ /dev/null @@ -1,99 +0,0 @@ -/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */ -/* r3964 linediscipline for linux - * - * ----------------------------------------------------------- - * Copyright by - * Philips Automation Projects - * Kassel (Germany) - * ----------------------------------------------------------- - * This software may be used and distributed according to the terms of - * the GNU General Public License, incorporated herein by reference. - * - * Author: - * L. Haag - * - * $Log: r3964.h,v $ - * Revision 1.4 2005/12/21 19:54:24 Kurt Huwig - * Fixed HZ usage on 2.6 kernels - * Removed unnecessary include - * - * Revision 1.3 2001/03/18 13:02:24 dwmw2 - * Fix timer usage, use spinlocks properly. - * - * Revision 1.2 2001/03/18 12:53:15 dwmw2 - * Merge changes in 2.4.2 - * - * Revision 1.1.1.1 1998/10/13 16:43:14 dwmw2 - * This'll screw the version control - * - * Revision 1.6 1998/09/30 00:40:38 dwmw2 - * Updated to use kernel's N_R3964 if available - * - * Revision 1.4 1998/04/02 20:29:44 lhaag - * select, blocking, ... - * - * Revision 1.3 1998/02/12 18:58:43 root - * fixed some memory leaks - * calculation of checksum characters - * - * Revision 1.2 1998/02/07 13:03:17 root - * ioctl read_telegram - * - * Revision 1.1 1998/02/06 19:19:43 root - * Initial revision - * - * - */ - -#ifndef _UAPI__LINUX_N_R3964_H__ -#define _UAPI__LINUX_N_R3964_H__ - -/* line disciplines for r3964 protocol */ - - -/* - * Ioctl-commands - */ - -#define R3964_ENABLE_SIGNALS 0x5301 -#define R3964_SETPRIORITY 0x5302 -#define R3964_USE_BCC 0x5303 -#define R3964_READ_TELEGRAM 0x5304 - -/* Options for R3964_SETPRIORITY */ -#define R3964_MASTER 0 -#define R3964_SLAVE 1 - -/* Options for R3964_ENABLE_SIGNALS */ -#define R3964_SIG_ACK 0x0001 -#define R3964_SIG_DATA 0x0002 -#define R3964_SIG_ALL 0x000f -#define R3964_SIG_NONE 0x0000 -#define R3964_USE_SIGIO 0x1000 - -/* - * r3964 operation states: - */ - -/* types for msg_id: */ -enum {R3964_MSG_ACK=1, R3964_MSG_DATA }; - -#define R3964_MAX_MSG_COUNT 32 - -/* error codes for client messages */ -#define R3964_OK 0 /* no error. */ -#define R3964_TX_FAIL -1 /* transmission error, block NOT sent */ -#define R3964_OVERFLOW -2 /* msg queue overflow */ - -/* the client gets this struct when calling read(fd,...): */ -struct r3964_client_message { - int msg_id; - int arg; - int error_code; -}; - -#define R3964_MTU 256 - - - -#endif /* _UAPI__LINUX_N_R3964_H__ */ -- cgit v1.2.3 From b7fb0916544de44ce099d9f3b6129c86b484de25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Thu, 13 May 2021 15:20:52 +0200 Subject: net: bridge: mcast: add ip4+ip6 mcast router timers to mdb netlink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have split the multicast router state into two, one for IPv4 and one for IPv6, also add individual timers to the mdb netlink router port dump. Leaving the old timer attribute for backwards compatibility. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 13d59c51ef5b..6b56a7549531 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -627,6 +627,8 @@ enum { MDBA_ROUTER_PATTR_UNSPEC, MDBA_ROUTER_PATTR_TIMER, MDBA_ROUTER_PATTR_TYPE, + MDBA_ROUTER_PATTR_INET_TIMER, + MDBA_ROUTER_PATTR_INET6_TIMER, __MDBA_ROUTER_PATTR_MAX }; #define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1) -- cgit v1.2.3 From 08fdced60ca08e34e316a3ab945636fcdfcbc973 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Sat, 15 May 2021 09:15:33 +0200 Subject: ALSA: rawmidi: Add framing mode This commit adds a new framing mode that frames all MIDI data into 32-byte frames with a timestamp. The main benefit is that we can get accurate timestamps even if userspace wakeup and processing is not immediate. Testing on a Celeron N3150 with this mode has a max jitter of 2.8 ms, compared to the in-kernel seq implementation which has a max jitter of 5 ms during idle and much worse when running scheduler stress tests in parallel. Signed-off-by: David Henningsson Reviewed-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20210515071533.55332-1-coding@diwic.se Signed-off-by: Takashi Iwai --- include/uapi/sound/asound.h | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 535a7229e1d9..d17c061950df 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -710,7 +710,7 @@ enum { * Raw MIDI section - /dev/snd/midi?? */ -#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 1) +#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 2) enum { SNDRV_RAWMIDI_STREAM_OUTPUT = 0, @@ -736,12 +736,38 @@ struct snd_rawmidi_info { unsigned char reserved[64]; /* reserved for future use */ }; +#define SNDRV_RAWMIDI_MODE_FRAMING_MASK (7<<0) +#define SNDRV_RAWMIDI_MODE_FRAMING_SHIFT 0 +#define SNDRV_RAWMIDI_MODE_FRAMING_NONE (0<<0) +#define SNDRV_RAWMIDI_MODE_FRAMING_TSTAMP (1<<0) +#define SNDRV_RAWMIDI_MODE_CLOCK_MASK (7<<3) +#define SNDRV_RAWMIDI_MODE_CLOCK_SHIFT 3 +#define SNDRV_RAWMIDI_MODE_CLOCK_NONE (0<<3) +#define SNDRV_RAWMIDI_MODE_CLOCK_REALTIME (1<<3) +#define SNDRV_RAWMIDI_MODE_CLOCK_MONOTONIC (2<<3) +#define SNDRV_RAWMIDI_MODE_CLOCK_MONOTONIC_RAW (3<<3) + +#define SNDRV_RAWMIDI_FRAMING_DATA_LENGTH 16 + +struct snd_rawmidi_framing_tstamp { + /* For now, frame_type is always 0. Midi 2.0 is expected to add new + * types here. Applications are expected to skip unknown frame types. + */ + __u8 frame_type; + __u8 length; /* number of valid bytes in data field */ + __u8 reserved[2]; + __u32 tv_nsec; /* nanoseconds */ + __u64 tv_sec; /* seconds */ + __u8 data[SNDRV_RAWMIDI_FRAMING_DATA_LENGTH]; +} __packed; + struct snd_rawmidi_params { int stream; size_t buffer_size; /* queue size in bytes */ size_t avail_min; /* minimum avail bytes for wakeup */ unsigned int no_active_sensing: 1; /* do not send active sensing byte in close() */ - unsigned char reserved[16]; /* reserved for future use */ + unsigned int mode; /* For input data only, frame incoming data */ + unsigned char reserved[12]; /* reserved for future use */ }; #ifndef __KERNEL__ -- cgit v1.2.3 From add0b32ef9146a8559a60aed54c37692a5f9d34f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 30 Apr 2021 17:06:01 -0500 Subject: siginfo: Move si_trapno inside the union inside _si_fault It turns out that linux uses si_trapno very sparingly, and as such it can be considered extra information for a very narrow selection of signals, rather than information that is present with every fault reported in siginfo. As such move si_trapno inside the union inside of _si_fault. This results in no change in placement, and makes it eaiser to extend _si_fault in the future as this reduces the number of special cases. In particular with si_trapno included in the union it is no longer a concern that the union must be pointer aligned on most architectures because the union follows immediately after si_addr which is a pointer. This change results in a difference in siginfo field placement on sparc and alpha for the fields si_addr_lsb, si_lower, si_upper, si_pkey, and si_perf. These architectures do not implement the signals that would use si_addr_lsb, si_lower, si_upper, si_pkey, and si_perf. Further these architecture have not yet implemented the userspace that would use si_perf. The point of this change is in fact to correct these placement issues before sparc or alpha grow userspace that cares. This change was discussed[1] and the agreement is that this change is currently safe. [1]: https://lkml.kernel.org/r/CAK8P3a0+uKYwL1NhY6Hvtieghba2hKYGD6hcKx5n8=4Gtt+pHA@mail.gmail.com Acked-by: Marco Elver v1: https://lkml.kernel.org/r/m1tunns7yf.fsf_-_@fess.ebiederm.org v2: https://lkml.kernel.org/r/20210505141101.11519-5-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-1-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/uapi/asm-generic/siginfo.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 03d6f6d2c1fe..e663bf117b46 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -63,9 +63,6 @@ union __sifields { /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */ struct { void __user *_addr; /* faulting insn/memory ref. */ -#ifdef __ARCH_SI_TRAPNO - int _trapno; /* TRAP # which caused the signal */ -#endif #ifdef __ia64__ int _imm; /* immediate value for "break" */ unsigned int _flags; /* see ia64 si_flags */ @@ -75,6 +72,8 @@ union __sifields { #define __ADDR_BND_PKEY_PAD (__alignof__(void *) < sizeof(short) ? \ sizeof(short) : __alignof__(void *)) union { + /* used on alpha and sparc */ + int _trapno; /* TRAP # which caused the signal */ /* * used when si_code=BUS_MCEERR_AR or * used when si_code=BUS_MCEERR_AO @@ -150,9 +149,7 @@ typedef struct siginfo { #define si_int _sifields._rt._sigval.sival_int #define si_ptr _sifields._rt._sigval.sival_ptr #define si_addr _sifields._sigfault._addr -#ifdef __ARCH_SI_TRAPNO #define si_trapno _sifields._sigfault._trapno -#endif #define si_addr_lsb _sifields._sigfault._addr_lsb #define si_lower _sifields._sigfault._addr_bnd._lower #define si_upper _sifields._sigfault._addr_bnd._upper -- cgit v1.2.3 From 0683b53197b55343a166f1507086823030809a19 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 2 May 2021 17:28:31 -0500 Subject: signal: Deliver all of the siginfo perf data in _perf Don't abuse si_errno and deliver all of the perf data in _perf member of siginfo_t. Note: The data field in the perf data structures in a u64 to allow a pointer to be encoded without needed to implement a 32bit and 64bit version of the same structure. There already exists a 32bit and 64bit versions siginfo_t, and the 32bit version can not include a 64bit member as it only has 32bit alignment. So unsigned long is used in siginfo_t instead of a u64 as unsigned long can encode a pointer on all architectures linux supports. v1: https://lkml.kernel.org/r/m11rarqqx2.fsf_-_@fess.ebiederm.org v2: https://lkml.kernel.org/r/20210503203814.25487-10-ebiederm@xmission.com v3: https://lkml.kernel.org/r/20210505141101.11519-11-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-4-ebiederm@xmission.com Reviewed-by: Marco Elver Signed-off-by: "Eric W. Biederman" --- include/uapi/asm-generic/siginfo.h | 8 ++++++-- include/uapi/linux/perf_event.h | 2 +- include/uapi/linux/signalfd.h | 4 ++-- 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index e663bf117b46..5a3c221f4c9d 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -91,7 +91,10 @@ union __sifields { __u32 _pkey; } _addr_pkey; /* used when si_code=TRAP_PERF */ - unsigned long _perf; + struct { + unsigned long _data; + __u32 _type; + } _perf; }; } _sigfault; @@ -154,7 +157,8 @@ typedef struct siginfo { #define si_lower _sifields._sigfault._addr_bnd._lower #define si_upper _sifields._sigfault._addr_bnd._upper #define si_pkey _sifields._sigfault._addr_pkey._pkey -#define si_perf _sifields._sigfault._perf +#define si_perf_data _sifields._sigfault._perf._data +#define si_perf_type _sifields._sigfault._perf._type #define si_band _sifields._sigpoll._band #define si_fd _sifields._sigpoll._fd #define si_call_addr _sifields._sigsys._call_addr diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e54e639248c8..7b14753b3d38 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -464,7 +464,7 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via - * siginfo_t::si_perf, e.g. to permit user to identify the event. + * siginfo_t::si_perf_data, e.g. to permit user to identify the event. */ __u64 sig_data; }; diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h index 7e333042c7e3..e78dddf433fc 100644 --- a/include/uapi/linux/signalfd.h +++ b/include/uapi/linux/signalfd.h @@ -39,8 +39,8 @@ struct signalfd_siginfo { __s32 ssi_syscall; __u64 ssi_call_addr; __u32 ssi_arch; - __u32 __pad3; - __u64 ssi_perf; + __u32 ssi_perf_type; + __u64 ssi_perf_data; /* * Pad strcture to 128 bytes. Remember to update the -- cgit v1.2.3 From 922e3013046b79b444c87eda5baf43afae1326a8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 3 May 2021 12:52:43 -0500 Subject: signalfd: Remove SIL_PERF_EVENT fields from signalfd_siginfo With the addition of ssi_perf_data and ssi_perf_type struct signalfd_siginfo is dangerously close to running out of space. All that remains is just enough space for two additional 64bit fields. A practice of adding all possible siginfo_t fields into struct singalfd_siginfo can not be supported as adding the missing fields ssi_lower, ssi_upper, and ssi_pkey would require two 64bit fields and one 32bit fields. In practice the fields ssi_perf_data and ssi_perf_type can never be used by signalfd as the signal that generates them always delivers them synchronously to the thread that triggers them. Therefore until someone actually needs the fields ssi_perf_data and ssi_perf_type in signalfd_siginfo remove them. This leaves a bit more room for future expansion. v1: https://lkml.kernel.org/r/20210503203814.25487-12-ebiederm@xmission.com v2: https://lkml.kernel.org/r/20210505141101.11519-12-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-5-ebiederm@xmission.com Reviewed-by: Marco Elver Signed-off-by: "Eric W. Biederman" --- include/uapi/linux/signalfd.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h index e78dddf433fc..83429a05b698 100644 --- a/include/uapi/linux/signalfd.h +++ b/include/uapi/linux/signalfd.h @@ -39,8 +39,6 @@ struct signalfd_siginfo { __s32 ssi_syscall; __u64 ssi_call_addr; __u32 ssi_arch; - __u32 ssi_perf_type; - __u64 ssi_perf_data; /* * Pad strcture to 128 bytes. Remember to update the @@ -51,7 +49,7 @@ struct signalfd_siginfo { * comes out of a read(2) and we really don't want to have * a compat on read(2). */ - __u8 __pad[16]; + __u8 __pad[28]; }; -- cgit v1.2.3 From 79a7f8bdb159d9914b58740f3d31d602a6e4aca8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 May 2021 17:36:03 -0700 Subject: bpf: Introduce bpf_sys_bpf() helper and program type. Add placeholders for bpf_sys_bpf() helper and new program type. Make sure to check that expected_attach_type is zero for future extensibility. Allow tracing helper functions to be used in this program type, since they will only execute from user context via bpf_prog_test_run. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210514003623.28033-2-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ec6d85a81744..c92648f38144 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -937,6 +937,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, + BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ }; enum bpf_attach_type { @@ -4735,6 +4736,12 @@ union bpf_attr { * be zero-terminated except when **str_size** is 0. * * Or **-EBUSY** if the per-CPU memory copy buffer is busy. + * + * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size) + * Description + * Execute bpf syscall with given arguments. + * Return + * A syscall result. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4903,6 +4910,7 @@ union bpf_attr { FN(check_mtu), \ FN(for_each_map_elem), \ FN(snprintf), \ + FN(sys_bpf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 387544bfa291a22383d60b40f887360e2b931ec6 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 May 2021 17:36:10 -0700 Subject: bpf: Introduce fd_idx Typical program loading sequence involves creating bpf maps and applying map FDs into bpf instructions in various places in the bpf program. This job is done by libbpf that is using compiler generated ELF relocations to patch certain instruction after maps are created and BTFs are loaded. The goal of fd_idx is to allow bpf instructions to stay immutable after compilation. At load time the libbpf would still create maps as usual, but it wouldn't need to patch instructions. It would store map_fds into __u32 fd_array[] and would pass that pointer to sys_bpf(BPF_PROG_LOAD). Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210514003623.28033-9-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c92648f38144..de58a714ed36 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1098,8 +1098,8 @@ enum bpf_link_type { /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD - * insn[0].imm: map fd + * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX] + * insn[0].imm: map fd or fd_idx * insn[1].imm: 0 * insn[0].off: 0 * insn[1].off: 0 @@ -1107,15 +1107,19 @@ enum bpf_link_type { * verifier type: CONST_PTR_TO_MAP */ #define BPF_PSEUDO_MAP_FD 1 -/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd +#define BPF_PSEUDO_MAP_IDX 5 + +/* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE + * insn[0].imm: map fd or fd_idx * insn[1].imm: offset into value * insn[0].off: 0 * insn[1].off: 0 * ldimm64 rewrite: address of map[0]+offset * verifier type: PTR_TO_MAP_VALUE */ -#define BPF_PSEUDO_MAP_VALUE 2 +#define BPF_PSEUDO_MAP_VALUE 2 +#define BPF_PSEUDO_MAP_IDX_VALUE 6 + /* insn[0].src_reg: BPF_PSEUDO_BTF_ID * insn[0].imm: kernel btd id of VAR * insn[1].imm: 0 @@ -1315,6 +1319,8 @@ union bpf_attr { /* or valid module BTF object fd or 0 to attach to vmlinux */ __u32 attach_btf_obj_fd; }; + __u32 :32; /* pad */ + __aligned_u64 fd_array; /* array of FDs */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit v1.2.3 From 3d78417b60fba249cc555468cb72d96f5cde2964 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 May 2021 17:36:11 -0700 Subject: bpf: Add bpf_btf_find_by_name_kind() helper. Add new helper: long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags) Description Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. Return Returns btf_id and btf_obj_fd in lower and upper 32 bits. It will be used by loader program to find btf_id to attach the program to and to find btf_ids of ksyms. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210514003623.28033-10-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index de58a714ed36..3cc07351c1cf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4748,6 +4748,12 @@ union bpf_attr { * Execute bpf syscall with given arguments. * Return * A syscall result. + * + * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags) + * Description + * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. + * Return + * Returns btf_id and btf_obj_fd in lower and upper 32 bits. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4917,6 +4923,7 @@ union bpf_attr { FN(for_each_map_elem), \ FN(snprintf), \ FN(sys_bpf), \ + FN(btf_find_by_name_kind), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 3abea089246f76c1517b054ddb5946f3f1dbd2c0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 May 2021 17:36:12 -0700 Subject: bpf: Add bpf_sys_close() helper. Add bpf_sys_close() helper to be used by the syscall/loader program to close intermediate FDs and other cleanup. Note this helper must never be allowed inside fdget/fdput bracketing. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210514003623.28033-11-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3cc07351c1cf..4cd9a0181f27 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4754,6 +4754,12 @@ union bpf_attr { * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. * Return * Returns btf_id and btf_obj_fd in lower and upper 32 bits. + * + * long bpf_sys_close(u32 fd) + * Description + * Execute close syscall for given FD. + * Return + * A syscall result. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4924,6 +4930,7 @@ union bpf_attr { FN(snprintf), \ FN(sys_bpf), \ FN(btf_find_by_name_kind), \ + FN(sys_close), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 7cd60e43a6def40ecb75deb8decc677995970d0b Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Tue, 18 May 2021 13:03:15 -0700 Subject: uapi/auxvec: Define the aux vector AT_MINSIGSTKSZ Define AT_MINSIGSTKSZ in the generic uapi header. It is already used as generic ABI in glibc's generic elf.h, and this define will prevent future namespace conflicts. In particular, x86 is also using this generic definition. Signed-off-by: Chang S. Bae Signed-off-by: Borislav Petkov Reviewed-by: Len Brown Acked-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20210518200320.17239-2-chang.seok.bae@intel.com --- include/uapi/linux/auxvec.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/auxvec.h b/include/uapi/linux/auxvec.h index abe5f2b6581b..c7e502bf5a6f 100644 --- a/include/uapi/linux/auxvec.h +++ b/include/uapi/linux/auxvec.h @@ -33,5 +33,8 @@ #define AT_EXECFN 31 /* filename of program */ +#ifndef AT_MINSIGSTKSZ +#define AT_MINSIGSTKSZ 51 /* minimal stack size for signal delivery */ +#endif #endif /* _UAPI_LINUX_AUXVEC_H */ -- cgit v1.2.3 From 5d67f349590ddc94b6d4e25f19085728db9de697 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 18 May 2021 18:40:32 -0700 Subject: bpf: Add cmd alias BPF_PROG_RUN Add BPF_PROG_RUN command as an alias to BPF_RPOG_TEST_RUN to better indicate the full range of use cases done by the command. Suggested-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210519014032.20908-1-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4cd9a0181f27..418b9b813d65 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -837,6 +837,7 @@ enum bpf_cmd { BPF_PROG_ATTACH, BPF_PROG_DETACH, BPF_PROG_TEST_RUN, + BPF_PROG_RUN = BPF_PROG_TEST_RUN, BPF_PROG_GET_NEXT_ID, BPF_MAP_GET_NEXT_ID, BPF_PROG_GET_FD_BY_ID, -- cgit v1.2.3 From c906b86e9c44946fae505a5996b1f4c24a699fe6 Mon Sep 17 00:00:00 2001 From: Sergey Gorenko Date: Mon, 10 May 2021 13:23:33 +0300 Subject: RDMA/mlx5: Add SQD2RTS bit to the alloc ucontext response The new bit in the comp_mask is needed to mark that kernel supports SQD2RTS transition for the modify QP command. Link: https://lore.kernel.org/r/7ce705fedac1b2b8e3a2f4013e04244dc5946344.1620641808.git.leonro@nvidia.com Reviewed-by: Evgenii Kochetov Signed-off-by: Sergey Gorenko Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/mlx5-abi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 27905a0268c9..995faf8f44bd 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -101,6 +101,7 @@ enum mlx5_ib_alloc_ucontext_resp_mask { MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY = 1UL << 1, MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE = 1UL << 2, + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS = 1UL << 3, }; enum mlx5_user_cmds_supp_uhw { -- cgit v1.2.3 From 12ccb76280f8c0c07794fa68f83286b934981ca5 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Tue, 13 Apr 2021 11:40:17 +0200 Subject: media: lirc: remove out of date comment This file has been updated many times since 2010. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index c45a4eaea667..9919f2062b14 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * lirc.h - linux infrared remote control header file - * last modified 2010/07/13 by Jarod Wilson */ #ifndef _LINUX_LIRC_H -- cgit v1.2.3 From b453e42a6e8b9fa4580011e923963248c56b9d4d Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 5 May 2021 15:18:53 -0400 Subject: drm/amdgpu: Add new placement for preemptible SG BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SG BOs such as dmabuf imports and userptr BOs do not consume system resources directly. Instead they point to resources owned elsewhere. They typically get evicted by DMABuf move notifiers of MMU notifiers. If those notifiers don't need to wait for hardware fences (i.e. the SG BOs are used in a preemptible context), then we don't need to limit them to the GTT size and we don't need TTM to evict them. Create a new placement for such preemptible SG BOs that does not impose artificial size limits and TTM evictions. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 2063a1c10f79..dc61537d7547 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -136,6 +136,10 @@ extern "C" { * accessing it with various hw blocks */ #define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10) +/* Flag that BO will be used only in preemptible context, which does + * not require GTT memory accounting + */ +#define AMDGPU_GEM_CREATE_PREEMPTIBLE (1 << 11) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit v1.2.3 From 2f0968827a48a3b01a0cc9185abd41978d5ce918 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Thu, 29 Apr 2021 16:48:16 +0200 Subject: media: uapi: Move the MPEG-2 stateless control type out of staging Move the MPEG-2 stateless control types out of staging, and re-number it to avoid any confusion. Signed-off-by: Ezequiel Garcia Tested-by: Jernej Skrabec Reviewed-by: Jernej Skrabec Tested-by: Daniel Almeida Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 311a01cc5775..d3bb18a3a51b 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1807,6 +1807,10 @@ enum v4l2_ctrl_type { V4L2_CTRL_TYPE_FWHT_PARAMS = 0x0220, V4L2_CTRL_TYPE_VP8_FRAME = 0x0240, + + V4L2_CTRL_TYPE_MPEG2_QUANTISATION = 0x0250, + V4L2_CTRL_TYPE_MPEG2_SEQUENCE = 0x0251, + V4L2_CTRL_TYPE_MPEG2_PICTURE = 0x0252, }; /* Used in the VIDIOC_QUERYCTRL ioctl for querying controls */ -- cgit v1.2.3 From f4815b399111d992c1118c708f464a847dfd29e2 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Thu, 29 Apr 2021 16:48:18 +0200 Subject: media: uapi: move MPEG-2 stateless controls out of staging Until now, the MPEG-2 V4L2 API was not exported as a public API, and only defined in a private media header (media/mpeg2-ctrls.h). After reviewing the MPEG-2 specification in detail, and reworking the controls so they match the MPEG-2 semantics properly, we can consider it ready. Signed-off-by: Ezequiel Garcia Tested-by: Jernej Skrabec Reviewed-by: Jernej Skrabec Tested-by: Daniel Almeida Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 112 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/videodev2.h | 3 + 2 files changed, 115 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index d43bec5f1afd..f96bea19c991 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -1862,6 +1862,118 @@ struct v4l2_ctrl_vp8_frame { __u64 flags; }; +/* Stateless MPEG-2 controls */ + +#define V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE 0x01 + +#define V4L2_CID_STATELESS_MPEG2_SEQUENCE (V4L2_CID_CODEC_STATELESS_BASE+220) +/** + * struct v4l2_ctrl_mpeg2_sequence - MPEG-2 sequence header + * + * All the members on this structure match the sequence header and sequence + * extension syntaxes as specified by the MPEG-2 specification. + * + * Fields horizontal_size, vertical_size and vbv_buffer_size are a + * combination of respective _value and extension syntax elements, + * as described in section 6.3.3 "Sequence header". + * + * @horizontal_size: combination of elements horizontal_size_value and + * horizontal_size_extension. + * @vertical_size: combination of elements vertical_size_value and + * vertical_size_extension. + * @vbv_buffer_size: combination of elements vbv_buffer_size_value and + * vbv_buffer_size_extension. + * @profile_and_level_indication: see MPEG-2 specification. + * @chroma_format: see MPEG-2 specification. + * @flags: see V4L2_MPEG2_SEQ_FLAG_{}. + */ +struct v4l2_ctrl_mpeg2_sequence { + __u16 horizontal_size; + __u16 vertical_size; + __u32 vbv_buffer_size; + __u16 profile_and_level_indication; + __u8 chroma_format; + __u8 flags; +}; + +#define V4L2_MPEG2_PIC_CODING_TYPE_I 1 +#define V4L2_MPEG2_PIC_CODING_TYPE_P 2 +#define V4L2_MPEG2_PIC_CODING_TYPE_B 3 +#define V4L2_MPEG2_PIC_CODING_TYPE_D 4 + +#define V4L2_MPEG2_PIC_TOP_FIELD 0x1 +#define V4L2_MPEG2_PIC_BOTTOM_FIELD 0x2 +#define V4L2_MPEG2_PIC_FRAME 0x3 + +#define V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST 0x0001 +#define V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT 0x0002 +#define V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV 0x0004 +#define V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE 0x0008 +#define V4L2_MPEG2_PIC_FLAG_INTRA_VLC 0x0010 +#define V4L2_MPEG2_PIC_FLAG_ALT_SCAN 0x0020 +#define V4L2_MPEG2_PIC_FLAG_REPEAT_FIRST 0x0040 +#define V4L2_MPEG2_PIC_FLAG_PROGRESSIVE 0x0080 + +#define V4L2_CID_STATELESS_MPEG2_PICTURE (V4L2_CID_CODEC_STATELESS_BASE+221) +/** + * struct v4l2_ctrl_mpeg2_picture - MPEG-2 picture header + * + * All the members on this structure match the picture header and picture + * coding extension syntaxes as specified by the MPEG-2 specification. + * + * @backward_ref_ts: timestamp of the V4L2 capture buffer to use as + * reference for backward prediction. + * @forward_ref_ts: timestamp of the V4L2 capture buffer to use as + * reference for forward prediction. These timestamp refers to the + * timestamp field in struct v4l2_buffer. Use v4l2_timeval_to_ns() + * to convert the struct timeval to a __u64. + * @flags: see V4L2_MPEG2_PIC_FLAG_{}. + * @f_code: see MPEG-2 specification. + * @picture_coding_type: see MPEG-2 specification. + * @picture_structure: see V4L2_MPEG2_PIC_{}_FIELD. + * @intra_dc_precision: see MPEG-2 specification. + * @reserved: padding field. Should be zeroed by applications. + */ +struct v4l2_ctrl_mpeg2_picture { + __u64 backward_ref_ts; + __u64 forward_ref_ts; + __u32 flags; + __u8 f_code[2][2]; + __u8 picture_coding_type; + __u8 picture_structure; + __u8 intra_dc_precision; + __u8 reserved[5]; +}; + +#define V4L2_CID_STATELESS_MPEG2_QUANTISATION (V4L2_CID_CODEC_STATELESS_BASE+222) +/** + * struct v4l2_ctrl_mpeg2_quantisation - MPEG-2 quantisation + * + * Quantisation matrices as specified by section 6.3.7 + * "Quant matrix extension". + * + * @intra_quantiser_matrix: The quantisation matrix coefficients + * for intra-coded frames, in zigzag scanning order. It is relevant + * for both luma and chroma components, although it can be superseded + * by the chroma-specific matrix for non-4:2:0 YUV formats. + * @non_intra_quantiser_matrix: The quantisation matrix coefficients + * for non-intra-coded frames, in zigzag scanning order. It is relevant + * for both luma and chroma components, although it can be superseded + * by the chroma-specific matrix for non-4:2:0 YUV formats. + * @chroma_intra_quantiser_matrix: The quantisation matrix coefficients + * for the chominance component of intra-coded frames, in zigzag scanning + * order. Only relevant for 4:2:2 and 4:4:4 YUV formats. + * @chroma_non_intra_quantiser_matrix: The quantisation matrix coefficients + * for the chrominance component of non-intra-coded frames, in zigzag scanning + * order. Only relevant for 4:2:2 and 4:4:4 YUV formats. + */ +struct v4l2_ctrl_mpeg2_quantisation { + __u8 intra_quantiser_matrix[64]; + __u8 non_intra_quantiser_matrix[64]; + __u8 chroma_intra_quantiser_matrix[64]; + __u8 chroma_non_intra_quantiser_matrix[64]; +}; + #define V4L2_CID_COLORIMETRY_CLASS_BASE (V4L2_CTRL_CLASS_COLORIMETRY | 0x900) #define V4L2_CID_COLORIMETRY_CLASS (V4L2_CTRL_CLASS_COLORIMETRY | 1) diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index d3bb18a3a51b..9260791b8438 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1747,6 +1747,9 @@ struct v4l2_ext_control { struct v4l2_ctrl_h264_decode_params __user *p_h264_decode_params; struct v4l2_ctrl_fwht_params __user *p_fwht_params; struct v4l2_ctrl_vp8_frame __user *p_vp8_frame; + struct v4l2_ctrl_mpeg2_sequence __user *p_mpeg2_sequence; + struct v4l2_ctrl_mpeg2_picture __user *p_mpeg2_picture; + struct v4l2_ctrl_mpeg2_quantisation __user *p_mpeg2_quantisation; void __user *ptr; }; } __attribute__ ((packed)); -- cgit v1.2.3 From 3e87f192b405960c0fe83e0925bd0dadf4f8cf43 Mon Sep 17 00:00:00 2001 From: Denis Salopek Date: Tue, 11 May 2021 23:00:04 +0200 Subject: bpf: Add lookup_and_delete_elem support to hashtab Extend the existing bpf_map_lookup_and_delete_elem() functionality to hashtab map types, in addition to stacks and queues. Create a new hashtab bpf_map_ops function that does lookup and deletion of the element under the same bucket lock and add the created map_ops to bpf.h. Signed-off-by: Denis Salopek Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/4d18480a3e990ffbf14751ddef0325eed3be2966.1620763117.git.denis.salopek@sartura.hr --- include/uapi/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 418b9b813d65..562adeac1d67 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -527,6 +527,15 @@ union bpf_iter_link_info { * Look up an element with the given *key* in the map referred to * by the file descriptor *fd*, and if found, delete the element. * + * For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map + * types, the *flags* argument needs to be set to 0, but for other + * map types, it may be specified as: + * + * **BPF_F_LOCK** + * Look up and delete the value of a spin-locked map + * without returning the lock. This must be specified if + * the elements contain a spinlock. + * * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types * implement this command as a "pop" operation, deleting the top * element rather than one corresponding to *key*. @@ -536,6 +545,10 @@ union bpf_iter_link_info { * This command is only valid for the following map types: * * **BPF_MAP_TYPE_QUEUE** * * **BPF_MAP_TYPE_STACK** + * * **BPF_MAP_TYPE_HASH** + * * **BPF_MAP_TYPE_PERCPU_HASH** + * * **BPF_MAP_TYPE_LRU_HASH** + * * **BPF_MAP_TYPE_LRU_PERCPU_HASH** * * Return * Returns zero on success. On error, -1 is returned and *errno* -- cgit v1.2.3 From e624d4ed4aa8cc3c69d1359b0aaea539203ed266 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 19 May 2021 17:07:45 +0800 Subject: xdp: Extend xdp_redirect_map with broadcast support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds two flags BPF_F_BROADCAST and BPF_F_EXCLUDE_INGRESS to extend xdp_redirect_map for broadcast support. With BPF_F_BROADCAST the packet will be broadcasted to all the interfaces in the map. with BPF_F_EXCLUDE_INGRESS the ingress interface will be excluded when do broadcasting. When getting the devices in dev hash map via dev_map_hash_get_next_key(), there is a possibility that we fall back to the first key when a device was removed. This will duplicate packets on some interfaces. So just walk the whole buckets to avoid this issue. For dev array map, we also walk the whole map to find valid interfaces. Function bpf_clear_redirect_map() was removed in commit ee75aef23afe ("bpf, xdp: Restructure redirect actions"). Add it back as we need to use ri->map again. With test topology: +-------------------+ +-------------------+ | Host A (i40e 10G) | ---------- | eno1(i40e 10G) | +-------------------+ | | | Host B | +-------------------+ | | | Host C (i40e 10G) | ---------- | eno2(i40e 10G) | +-------------------+ | | | +------+ | | veth0 -- | Peer | | | veth1 -- | | | | veth2 -- | NS | | | +------+ | +-------------------+ On Host A: # pktgen/pktgen_sample03_burst_single_flow.sh -i eno1 -d $dst_ip -m $dst_mac -s 64 On Host B(Intel(R) Xeon(R) CPU E5-2690 v3 @ 2.60GHz, 128G Memory): Use xdp_redirect_map and xdp_redirect_map_multi in samples/bpf for testing. All the veth peers in the NS have a XDP_DROP program loaded. The forward_map max_entries in xdp_redirect_map_multi is modify to 4. Testing the performance impact on the regular xdp_redirect path with and without patch (to check impact of additional check for broadcast mode): 5.12 rc4 | redirect_map i40e->i40e | 2.0M | 9.7M 5.12 rc4 | redirect_map i40e->veth | 1.7M | 11.8M 5.12 rc4 + patch | redirect_map i40e->i40e | 2.0M | 9.6M 5.12 rc4 + patch | redirect_map i40e->veth | 1.7M | 11.7M Testing the performance when cloning packets with the redirect_map_multi test, using a redirect map size of 4, filled with 1-3 devices: 5.12 rc4 + patch | redirect_map multi i40e->veth (x1) | 1.7M | 11.4M 5.12 rc4 + patch | redirect_map multi i40e->veth (x2) | 1.1M | 4.3M 5.12 rc4 + patch | redirect_map multi i40e->veth (x3) | 0.8M | 2.6M Signed-off-by: Hangbin Liu Signed-off-by: Daniel Borkmann Acked-by: Toke Høiland-Jørgensen Acked-by: Martin KaFai Lau Acked-by: John Fastabend Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/20210519090747.1655268-3-liuhangbin@gmail.com --- include/uapi/linux/bpf.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 562adeac1d67..2c1ba70abbf1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2555,8 +2555,12 @@ union bpf_attr { * The lower two bits of *flags* are used as the return code if * the map lookup fails. This is so that the return value can be * one of the XDP program return codes up to **XDP_TX**, as chosen - * by the caller. Any higher bits in the *flags* argument must be - * unset. + * by the caller. The higher bits of *flags* can be set to + * BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below. + * + * With BPF_F_BROADCAST the packet will be broadcasted to all the + * interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress + * interface will be excluded when do broadcasting. * * See also **bpf_redirect**\ (), which only supports redirecting * to an ifindex, but doesn't require a map to do so. @@ -5122,6 +5126,12 @@ enum { BPF_F_BPRM_SECUREEXEC = (1ULL << 0), }; +/* Flags for bpf_redirect_map helper */ +enum { + BPF_F_BROADCAST = (1ULL << 3), + BPF_F_EXCLUDE_INGRESS = (1ULL << 4), +}; + #define __bpf_md_ptr(type, name) \ union { \ type name; \ -- cgit v1.2.3 From 7e97d274db920df479e222fed10e7b242f90ffb0 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 17 May 2021 13:24:25 +0200 Subject: can: uapi: update CAN-FD frame description Since an early version of the CAN-FD specification the bit that defines a CAN-FD frame on the wire, has been renamed from Extended Data Length (EDL) to FD Frame (FDF). To avoid confusion, update the struct canfd_frame description in the UAPI headers accordingly. Link: https://lore.kernel.org/r/20210517113727.77597-1-mkl@pengutronix.de Suggested-by: Ayoub Kaanich Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index c7535352fef6..ac5d7a31671f 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -123,8 +123,8 @@ struct can_frame { /* * defined bits for canfd_frame.flags * - * The use of struct canfd_frame implies the Extended Data Length (EDL) bit to - * be set in the CAN frame bitstream on the wire. The EDL bit switch turns + * The use of struct canfd_frame implies the FD Frame (FDF) bit to + * be set in the CAN frame bitstream on the wire. The FDF bit switch turns * the CAN controllers bitstream processor into the CAN FD mode which creates * two new options within the CAN FD frame specification: * -- cgit v1.2.3 From 02546884221279da2725e87e35348290470363d7 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 11 Apr 2017 15:43:43 +0200 Subject: can: uapi: introduce CANFD_FDF flag for mixed content in struct canfd_frame The struct can_frame and struct canfd_frame intentionally share the same layout to be able to write CAN frame content into a CAN FD frame structure. When this is done the former differentiation via CAN_MTU / CANFD_MTU is lost. CANFD_FDF allows programmers to mark CAN FD frames in the case of using struct canfd_frame for mixed CAN/CAN FD content (dual use). N.B. the Kernel APIs do NOT provide mixed CAN / CAN FD content inside of struct canfd_frame therefore the CANFD_FDF flag is disregarded by Linux. Link: https://lore.kernel.org/r/20170411134343.3089-1-socketcan@hartkopp.net Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index ac5d7a31671f..90801ada2bbe 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -135,9 +135,18 @@ struct can_frame { * controller only the CANFD_BRS bit is relevant for real CAN controllers when * building a CAN FD frame for transmission. Setting the CANFD_ESI bit can make * sense for virtual CAN interfaces to test applications with echoed frames. + * + * The struct can_frame and struct canfd_frame intentionally share the same + * layout to be able to write CAN frame content into a CAN FD frame structure. + * When this is done the former differentiation via CAN_MTU / CANFD_MTU gets + * lost. CANFD_FDF allows programmers to mark CAN FD frames in the case of + * using struct canfd_frame for mixed CAN / CAN FD content (dual use). + * N.B. the Kernel APIs do NOT provide mixed CAN / CAN FD content inside of + * struct canfd_frame therefore the CANFD_FDF flag is disregarded by Linux. */ #define CANFD_BRS 0x01 /* bit rate switch (second bitrate for payload data) */ #define CANFD_ESI 0x02 /* error state indicator of the transmitting node */ +#define CANFD_FDF 0x04 /* mark CAN FD for dual use of struct canfd_frame */ /** * struct canfd_frame - CAN flexible data rate frame structure -- cgit v1.2.3 From fb1070d18edb37daf3979662975bc54625a19953 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Fri, 21 May 2021 01:58:43 -0700 Subject: KVM: X86: Use _BITUL() macro in UAPI headers Replace BIT() in KVM's UPAI header with _BITUL(). BIT() is not defined in the UAPI headers and its usage may cause userspace build errors. Fixes: fb04a1eddb1a ("KVM: X86: Implement ring-based dirty memory tracking") Signed-off-by: Joe Richey Message-Id: <20210521085849.37676-3-joerichey94@gmail.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3fd9a7e9d90c..79d9c44d1ad7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -8,6 +8,7 @@ * Note: you must update KVM_API_VERSION if you change this interface. */ +#include #include #include #include @@ -1879,8 +1880,8 @@ struct kvm_hyperv_eventfd { * conversion after harvesting an entry. Also, it must not skip any * dirty bits, so that dirty bits are always harvested in sequence. */ -#define KVM_DIRTY_GFN_F_DIRTY BIT(0) -#define KVM_DIRTY_GFN_F_RESET BIT(1) +#define KVM_DIRTY_GFN_F_DIRTY _BITUL(0) +#define KVM_DIRTY_GFN_F_RESET _BITUL(1) #define KVM_DIRTY_GFN_F_MASK 0x3 /* -- cgit v1.2.3 From ff92ecf575a9293afcf189c69e84f68b6595b77a Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 19 Mar 2021 22:03:13 +0100 Subject: drm/fourcc: Add 16 bpc fixed point framebuffer formats. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are 16 bits per color channel unsigned normalized formats. They are supported by at least AMD display hw, and suitable for direct scanout of Vulkan swapchain images in the format VK_FORMAT_R16G16B16A16_UNORM. Reviewed-by: Ville Syrjälä Signed-off-by: Mario Kleiner Signed-off-by: Alex Deucher --- include/uapi/drm/drm_fourcc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index f76de49c768f..f7156322aba5 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -168,6 +168,13 @@ extern "C" { #define DRM_FORMAT_RGBA1010102 fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little endian */ #define DRM_FORMAT_BGRA1010102 fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little endian */ +/* 64 bpp RGB */ +#define DRM_FORMAT_XRGB16161616 fourcc_code('X', 'R', '4', '8') /* [63:0] x:R:G:B 16:16:16:16 little endian */ +#define DRM_FORMAT_XBGR16161616 fourcc_code('X', 'B', '4', '8') /* [63:0] x:B:G:R 16:16:16:16 little endian */ + +#define DRM_FORMAT_ARGB16161616 fourcc_code('A', 'R', '4', '8') /* [63:0] A:R:G:B 16:16:16:16 little endian */ +#define DRM_FORMAT_ABGR16161616 fourcc_code('A', 'B', '4', '8') /* [63:0] A:B:G:R 16:16:16:16 little endian */ + /* * Floating point 64bpp RGB * IEEE 754-2008 binary16 half-precision float -- cgit v1.2.3 From 133dc203d77dff617d9c4673973ef3859be2c476 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 4 May 2021 17:54:06 +0200 Subject: netfilter: nft_exthdr: Support SCTP chunks Chunks are SCTP header extensions similar in implementation to IPv6 extension headers or TCP options. Reusing exthdr expression to find and extract field values from them is therefore pretty straightforward. For now, this supports extracting data from chunks at a fixed offset (and length) only - chunks themselves are an extensible data structure; in order to make all fields available, a nested extension search is needed. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 1fb4ca18ffbb..19715e2679d1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -813,11 +813,13 @@ enum nft_exthdr_flags { * @NFT_EXTHDR_OP_IPV6: match against ipv6 extension headers * @NFT_EXTHDR_OP_TCP: match against tcp options * @NFT_EXTHDR_OP_IPV4: match against ipv4 options + * @NFT_EXTHDR_OP_SCTP: match against sctp chunks */ enum nft_exthdr_op { NFT_EXTHDR_OP_IPV6, NFT_EXTHDR_OP_TCPOPT, NFT_EXTHDR_OP_IPV4, + NFT_EXTHDR_OP_SCTP, __NFT_EXTHDR_OP_MAX }; #define NFT_EXTHDR_OP_MAX (__NFT_EXTHDR_OP_MAX - 1) -- cgit v1.2.3 From 88938bf343efbc4d31677a91a0ed1d189be1e7cb Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 18 May 2021 11:51:18 +0200 Subject: drm: reference mode flags in DRM_CLIENT_CAP_* docs In the docs for DRM_CLIENT_CAP_STEREO_3D and DRM_CLIENT_CAP_ASPECT_RATIO, reference the DRM_MODE_FLAG_* defines that get set when the cap is enabled. Signed-off-by: Simon Ser Reviewed-by: Daniel Vetter Acked-by: Daniel Stone Acked-by: Pekka Paalanen Link: https://patchwork.freedesktop.org/patch/434201/ --- include/uapi/drm/drm.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 67b94bc3c885..1c947227f72b 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -777,9 +777,9 @@ struct drm_get_cap { /** * DRM_CLIENT_CAP_STEREO_3D * - * if set to 1, the DRM core will expose the stereo 3D capabilities of the + * If set to 1, the DRM core will expose the stereo 3D capabilities of the * monitor by advertising the supported 3D layouts in the flags of struct - * drm_mode_modeinfo. + * drm_mode_modeinfo. See ``DRM_MODE_FLAG_3D_*``. */ #define DRM_CLIENT_CAP_STEREO_3D 1 @@ -804,6 +804,7 @@ struct drm_get_cap { * DRM_CLIENT_CAP_ASPECT_RATIO * * If set to 1, the DRM core will provide aspect ratio information in modes. + * See ``DRM_MODE_FLAG_PIC_AR_*``. */ #define DRM_CLIENT_CAP_ASPECT_RATIO 4 -- cgit v1.2.3 From bbf4627ba6415711da94f8106a7de993c49372a6 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 18 May 2021 11:56:54 +0200 Subject: drm: clarify and linkify DRM_CLIENT_CAP_WRITEBACK_CONNECTORS docs Make it clear that the client is responsible for enabling ATOMIC prior to enabling WRITEBACK_CONNECTORS. Linkify the reference to ATOMIC. Signed-off-by: Simon Ser Reviewed-by: Daniel Vetter Acked-by: Daniel Stone Acked-by: Pekka Paalanen Link: https://patchwork.freedesktop.org/patch/434200/ --- include/uapi/drm/drm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 1c947227f72b..87878aea4526 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -812,8 +812,8 @@ struct drm_get_cap { * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS * * If set to 1, the DRM core will expose special connectors to be used for - * writing back to memory the scene setup in the commit. Depends on client - * also supporting DRM_CLIENT_CAP_ATOMIC + * writing back to memory the scene setup in the commit. The client must enable + * &DRM_CLIENT_CAP_ATOMIC first. */ #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 -- cgit v1.2.3 From 2e290c8d8d29278b9a20e2765ab8f6df02f2e707 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 18 May 2021 13:11:31 +0200 Subject: drm: document minimum kernel version for DRM_CLIENT_CAP_* The kernel versions including the following commits are referenced: DRM_CLIENT_CAP_STEREO_3D 61d8e3282541 ("drm: Add a STEREO_3D capability to the SET_CLIENT_CAP ioctl") DRM_CLIENT_CAP_UNIVERSAL_PLANES 681e7ec73044 ("drm: Allow userspace to ask for universal plane list (v2)") c7dbc6c9ae5c ("drm: Remove command line guard for universal planes") DRM_CLIENT_CAP_ATOMIC 88a48e297b3a ("drm: add atomic properties") 8b72ce158cf0 ("drm: Always enable atomic API") DRM_CLIENT_CAP_ASPECT_RATIO 7595bda2fb43 ("drm: Add DRM client cap for aspect-ratio") DRM_CLIENT_CAP_WRITEBACK_CONNECTORS d67b6a206507 ("drm: writeback: Add client capability for exposing writeback connectors") Signed-off-by: Simon Ser Reviewed-by: Daniel Vetter Acked-by: Daniel Stone Acked-by: Pekka Paalanen Link: https://patchwork.freedesktop.org/patch/434202/ --- include/uapi/drm/drm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 87878aea4526..d043752a74cf 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -780,6 +780,9 @@ struct drm_get_cap { * If set to 1, the DRM core will expose the stereo 3D capabilities of the * monitor by advertising the supported 3D layouts in the flags of struct * drm_mode_modeinfo. See ``DRM_MODE_FLAG_3D_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 3.13. */ #define DRM_CLIENT_CAP_STEREO_3D 1 @@ -788,6 +791,9 @@ struct drm_get_cap { * * If set to 1, the DRM core will expose all planes (overlay, primary, and * cursor) to userspace. + * + * This capability has been introduced in kernel version 3.15. Starting from + * kernel version 3.17, this capability is always supported for all drivers. */ #define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 @@ -797,6 +803,13 @@ struct drm_get_cap { * If set to 1, the DRM core will expose atomic properties to userspace. This * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and * &DRM_CLIENT_CAP_ASPECT_RATIO. + * + * If the driver doesn't support atomic mode-setting, enabling this capability + * will fail with -EOPNOTSUPP. + * + * This capability has been introduced in kernel version 4.0. Starting from + * kernel version 4.2, this capability is always supported for atomic-capable + * drivers. */ #define DRM_CLIENT_CAP_ATOMIC 3 @@ -805,6 +818,9 @@ struct drm_get_cap { * * If set to 1, the DRM core will provide aspect ratio information in modes. * See ``DRM_MODE_FLAG_PIC_AR_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 4.18. */ #define DRM_CLIENT_CAP_ASPECT_RATIO 4 @@ -814,6 +830,9 @@ struct drm_get_cap { * If set to 1, the DRM core will expose special connectors to be used for * writing back to memory the scene setup in the commit. The client must enable * &DRM_CLIENT_CAP_ATOMIC first. + * + * This capability is always supported for atomic-capable drivers starting from + * kernel version 4.19. */ #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 -- cgit v1.2.3 From dd8b477f9a3d8edb136207acb3652e1a34a661b7 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 1 Jun 2021 11:33:59 +0200 Subject: mount: Support "nosymfollow" in new mount api Commit dab741e0e02b ("Add a "nosymfollow" mount option.") added support for the "nosymfollow" mount option allowing to block following symlinks when resolving paths. The mount option so far was only available in the old mount api. Make it available in the new mount api as well. Bonus is that it can be applied to a whole subtree not just a single mount. Cc: Christoph Hellwig Cc: Mattias Nissler Cc: Aleksa Sarai Cc: Al Viro Cc: Ross Zwisler Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index e6524ead2b7b..dd7a166fdf9c 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -120,6 +120,7 @@ enum fsconfig_command { #define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ #define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ #define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */ /* * mount_setattr() -- cgit v1.2.3 From e1d9a90a9bfdb0735062d3adb16b07314b4b7b01 Mon Sep 17 00:00:00 2001 From: Sharath Chandra Vurukala Date: Wed, 2 Jun 2021 00:58:35 +0530 Subject: net: ethernet: rmnet: Support for ingress MAPv5 checksum offload Adding support for processing of MAPv5 downlink packets. It involves parsing the Mapv5 packet and checking the csum header to know whether the hardware has validated the checksum and is valid or not. Based on the checksum valid bit the corresponding stats are incremented and skb->ip_summed is marked either CHECKSUM_UNNECESSARY or left as CHEKSUM_NONE to let network stack revalidate the checksum and update the respective snmp stats. Current MAPV1 header has been modified, the reserved field in the Mapv1 header is now used for next header indication. Signed-off-by: Sharath Chandra Vurukala Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index cd5b382a4138..1f753dcd85e1 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1236,6 +1236,7 @@ enum { #define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) +#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4) enum { IFLA_RMNET_UNSPEC, -- cgit v1.2.3 From b6e5d27e32ef6089d316ce7e1ecaf595584d4b84 Mon Sep 17 00:00:00 2001 From: Sharath Chandra Vurukala Date: Wed, 2 Jun 2021 00:58:36 +0530 Subject: net: ethernet: rmnet: Add support for MAPv5 egress packets Adding support for MAPv5 egress packets. This involves adding the MAPv5 header and setting the csum_valid_required in the checksum header to request HW compute the checksum. Corresponding stats are incremented based on whether the checksum is computed in software or HW. New stat has been added which represents the count of packets whose checksum is calculated by the HW. Signed-off-by: Sharath Chandra Vurukala Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 1f753dcd85e1..a5a7f0e64865 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1237,6 +1237,7 @@ enum { #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) #define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4) +#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5 (1U << 5) enum { IFLA_RMNET_UNSPEC, -- cgit v1.2.3 From 29b4c589b43d8dc0c0a5342cd2ac5da6ec1116b5 Mon Sep 17 00:00:00 2001 From: Jiawei Gu Date: Wed, 14 Apr 2021 16:44:36 +0800 Subject: drm/amdgpu: Add vbios info ioctl interface Add AMDGPU_INFO_VBIOS_INFO subquery id for detailed vbios info. Provides a way for the user application to get the VBIOS information without having to parse the binary. It is useful for the user to be able to display in a simple way the VBIOS version in their system if they happen to encounter an issue. V2: Use numeric serial. Parse and expose vbios version string. V3: Remove redundant data in drm_amdgpu_info_vbios struct. V4: 64 bit alignment in drm_amdgpu_info_vbios. v5: squash together all the reverts, etc. (Alex) Signed-off-by: Jiawei Gu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index dc61537d7547..91a23187902e 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -757,6 +757,8 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_VBIOS_SIZE 0x1 /* Subquery id: Query vbios image */ #define AMDGPU_INFO_VBIOS_IMAGE 0x2 + /* Subquery id: Query vbios info */ + #define AMDGPU_INFO_VBIOS_INFO 0x3 /* Query UVD handles */ #define AMDGPU_INFO_NUM_HANDLES 0x1C /* Query sensor related information */ @@ -950,6 +952,15 @@ struct drm_amdgpu_info_firmware { __u32 feature; }; +struct drm_amdgpu_info_vbios { + __u8 name[64]; + __u8 vbios_pn[64]; + __u32 version; + __u32 pad; + __u8 vbios_ver_str[32]; + __u8 date[32]; +}; + #define AMDGPU_VRAM_TYPE_UNKNOWN 0 #define AMDGPU_VRAM_TYPE_GDDR1 1 #define AMDGPU_VRAM_TYPE_DDR2 2 -- cgit v1.2.3 From d170ebb00472268410dce80ae4834c98e79315da Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 21 May 2021 10:45:44 +0200 Subject: media: uapi/linux/cec-funcs.h: set delay to 1 if unnused If the audio_out_delay value is unused, then set it to 1, not 0. The value 0 is reserved, and 1 is a much safer value since it translates to a delay of (1 - 1) * 2 = 0 ms. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/cec-funcs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/cec-funcs.h b/include/uapi/linux/cec-funcs.h index 37590027b604..c3baaea0b8ef 100644 --- a/include/uapi/linux/cec-funcs.h +++ b/include/uapi/linux/cec-funcs.h @@ -1665,7 +1665,7 @@ static inline void cec_ops_report_current_latency(const struct cec_msg *msg, if (*audio_out_compensated == 3 && msg->len >= 7) *audio_out_delay = msg->msg[6]; else - *audio_out_delay = 0; + *audio_out_delay = 1; } static inline void cec_msg_request_current_latency(struct cec_msg *msg, -- cgit v1.2.3 From ce67eaca95f8ab5c6aae41a10adfe9a6e8efa58c Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Fri, 21 May 2021 10:58:46 +0200 Subject: media: vicodec: Use _BITUL() macro in UAPI headers Replace BIT() in v4l2's UPAI header with _BITUL(). BIT() is not defined in the UAPI headers and its usage may cause userspace build errors. Fixes: 206bc0f6fb94 ("media: vicodec: mark the stateless FWHT API as stable") Signed-off-by: Joe Richey Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index f96bea19c991..fdf97a6d7d18 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -50,6 +50,7 @@ #ifndef __LINUX_V4L2_CONTROLS_H #define __LINUX_V4L2_CONTROLS_H +#include #include /* Control classes */ @@ -1602,30 +1603,30 @@ struct v4l2_ctrl_h264_decode_params { #define V4L2_FWHT_VERSION 3 /* Set if this is an interlaced format */ -#define V4L2_FWHT_FL_IS_INTERLACED BIT(0) +#define V4L2_FWHT_FL_IS_INTERLACED _BITUL(0) /* Set if this is a bottom-first (NTSC) interlaced format */ -#define V4L2_FWHT_FL_IS_BOTTOM_FIRST BIT(1) +#define V4L2_FWHT_FL_IS_BOTTOM_FIRST _BITUL(1) /* Set if each 'frame' contains just one field */ -#define V4L2_FWHT_FL_IS_ALTERNATE BIT(2) +#define V4L2_FWHT_FL_IS_ALTERNATE _BITUL(2) /* * If V4L2_FWHT_FL_IS_ALTERNATE was set, then this is set if this * 'frame' is the bottom field, else it is the top field. */ -#define V4L2_FWHT_FL_IS_BOTTOM_FIELD BIT(3) +#define V4L2_FWHT_FL_IS_BOTTOM_FIELD _BITUL(3) /* Set if the Y' plane is uncompressed */ -#define V4L2_FWHT_FL_LUMA_IS_UNCOMPRESSED BIT(4) +#define V4L2_FWHT_FL_LUMA_IS_UNCOMPRESSED _BITUL(4) /* Set if the Cb plane is uncompressed */ -#define V4L2_FWHT_FL_CB_IS_UNCOMPRESSED BIT(5) +#define V4L2_FWHT_FL_CB_IS_UNCOMPRESSED _BITUL(5) /* Set if the Cr plane is uncompressed */ -#define V4L2_FWHT_FL_CR_IS_UNCOMPRESSED BIT(6) +#define V4L2_FWHT_FL_CR_IS_UNCOMPRESSED _BITUL(6) /* Set if the chroma plane is full height, if cleared it is half height */ -#define V4L2_FWHT_FL_CHROMA_FULL_HEIGHT BIT(7) +#define V4L2_FWHT_FL_CHROMA_FULL_HEIGHT _BITUL(7) /* Set if the chroma plane is full width, if cleared it is half width */ -#define V4L2_FWHT_FL_CHROMA_FULL_WIDTH BIT(8) +#define V4L2_FWHT_FL_CHROMA_FULL_WIDTH _BITUL(8) /* Set if the alpha plane is uncompressed */ -#define V4L2_FWHT_FL_ALPHA_IS_UNCOMPRESSED BIT(9) +#define V4L2_FWHT_FL_ALPHA_IS_UNCOMPRESSED _BITUL(9) /* Set if this is an I Frame */ -#define V4L2_FWHT_FL_I_FRAME BIT(10) +#define V4L2_FWHT_FL_I_FRAME _BITUL(10) /* A 4-values flag - the number of components - 1 */ #define V4L2_FWHT_FL_COMPONENTS_NUM_MSK GENMASK(18, 16) -- cgit v1.2.3 From 4677efc486e1872f62d4632c50f7183f82296fa6 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:19 +0300 Subject: devlink: Introduce rate object Allow registering rate object for devlink ports with dedicated devlink_rate_leaf_{create|destroy}() API. Implement new netlink DEVLINK_CMD_RATE_GET command that is used to retrieve rate object info. Add new DEVLINK_CMD_RATE_{NEW|DEL} commands that are used for notifications when creating/deleting leaf rate object. Rate API is intended to be used for rate limiting of individual devlink ports (leafs) and their aggregates (nodes). Example: $ devlink port show pci/0000:03:00.0/0 pci/0000:03:00.0/1 $ devlink port function rate show pci/0000:03:00.0/0: type leaf pci/0000:03:00.0/1: type leaf Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index f6008b2fa60f..0c27b45c47db 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -126,6 +126,11 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_TEST, + DEVLINK_CMD_RATE_GET, /* can dump */ + DEVLINK_CMD_RATE_SET, + DEVLINK_CMD_RATE_NEW, + DEVLINK_CMD_RATE_DEL, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -206,6 +211,10 @@ enum devlink_port_flavour { */ }; +enum devlink_rate_type { + DEVLINK_RATE_TYPE_LEAF, +}; + enum devlink_param_cmode { DEVLINK_PARAM_CMODE_RUNTIME, DEVLINK_PARAM_CMODE_DRIVERINIT, @@ -534,6 +543,8 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_ACTION_STATS, /* nested */ DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */ + + DEVLINK_ATTR_RATE_TYPE, /* u16 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From 1897db2ec3109eb1dd07b357c95c5e03d54e41b9 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:22 +0300 Subject: devlink: Allow setting tx rate for devlink rate leaf objects Implement support for DEVLINK_CMD_RATE_SET command with new attributes DEVLINK_ATTR_RATE_TX_{SHARE|MAX} that are used to set devlink rate shared/max tx rate values. Extend devlink ops with new callbacks rate_leaf_tx_{share|max}_set() to allow supporting drivers to implement rate control through devlink. New attributes are optional. Driver implementations are allowed to support either or both of them. Shared rate example: $ devlink port function rate set netdevsim/netdevsim10/0 tx_share 10mbit $ devlink port function rate show netdevsim/netdevsim10/0 netdevsim/netdevsim10/0: type leaf tx_share 10mbit Max rate example: $ devlink port function rate set netdevsim/netdevsim10/0 tx_max 100mbit $ devlink port function rate show netdevsim/netdevsim10/0 netdevsim/netdevsim10/0: type leaf tx_max 100mbit Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 0c27b45c47db..ae94cd2a1078 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -545,6 +545,8 @@ enum devlink_attr { DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */ DEVLINK_ATTR_RATE_TYPE, /* u16 */ + DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ + DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From a8ecb93ef03de4c59fb6289f99bc9616a852c917 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:25 +0300 Subject: devlink: Introduce rate nodes Implement support for DEVLINK_CMD_RATE_{NEW|DEL} commands that are used to create and delete devlink rate nodes. Add new attribute DEVLINK_ATTR_RATE_NODE_NAME that specify node name string. The node name is an alphanumeric identifier. No valid node name can be a devlink port index, eg. decimal number. Extend devlink ops with new callbacks rate_node_{new|del}() and rate_node_tx_{share|max}_set() to allow supporting drivers to implement ports rate grouping and setting tx rate of rate nodes through devlink. Expose devlink_rate_nodes_destroy() function to allow vendor driver do proper cleanup of internally allocated resources for the nodes if the driver goes down or due to any other reasons which requires nodes to be destroyed. Disallow moving device from switchdev to legacy mode if any node exists on that device. User must explicitly delete nodes before switching mode. Example: $ devlink port function rate add netdevsim/netdevsim10/group1 $ devlink port function rate set netdevsim/netdevsim10/group1 \ tx_share 10mbit tx_max 100mbit Add + set command can be combined: $ devlink port function rate add netdevsim/netdevsim10/group1 \ tx_share 10mbit tx_max 100mbit $ devlink port function rate show netdevsim/netdevsim10/group1 netdevsim/netdevsim10/group1: type node tx_share 10mbit tx_max 100mbit $ devlink port function rate del netdevsim/netdevsim10/group1 Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ae94cd2a1078..7e15853b77fe 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -213,6 +213,7 @@ enum devlink_port_flavour { enum devlink_rate_type { DEVLINK_RATE_TYPE_LEAF, + DEVLINK_RATE_TYPE_NODE, }; enum devlink_param_cmode { @@ -547,6 +548,8 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TYPE, /* u16 */ DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ + DEVLINK_ATTR_RATE_NODE_NAME, /* string */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From d7555984507822458b32a6405881038241d140be Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:28 +0300 Subject: devlink: Allow setting parent node of rate objects Refactor DEVLINK_CMD_RATE_{GET|SET} command handlers to support setting a node as a parent for another rate object (leaf or node) by means of new attribute DEVLINK_ATTR_RATE_PARENT_NODE_NAME. Extend devlink ops with new callbacks rate_{leaf|node}_parent_set() to set node as a parent for rate object to allow supporting drivers to implement rate grouping through devlink. Driver implementations are allowed to support leafs or node children only. Invoking callback with NULL as parent should be threated by the driver as unset parent action. Extend rate object struct with reference counter to disallow deleting a node with any child pointing to it. User should unset parent for the child explicitly. Example: $ devlink port function rate add netdevsim/netdevsim10/group1 $ devlink port function rate add netdevsim/netdevsim10/group2 $ devlink port function rate set netdevsim/netdevsim10/group1 parent group2 $ devlink port function rate show netdevsim/netdevsim10/group1 netdevsim/netdevsim10/group1: type node parent group2 $ devlink port function rate set netdevsim/netdevsim10/group1 noparent Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 7e15853b77fe..32f53a0069d6 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -549,6 +549,7 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ DEVLINK_ATTR_RATE_NODE_NAME, /* string */ + DEVLINK_ATTR_RATE_PARENT_NODE_NAME, /* string */ /* add new attributes above here, update the policy in devlink.c */ -- cgit v1.2.3 From b48c24c2d710cf34810c555dcef883a3d35a9c08 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 2 Jun 2021 15:51:31 -0500 Subject: RDMA/irdma: Implement device supported verb APIs Implement device supported verb APIs. The supported APIs vary based on the underlying transport the ibdev is registered as (i.e. iWARP or RoCEv2). Link: https://lore.kernel.org/r/20210602205138.889-10-shiraz.saleem@intel.com Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/ib_user_ioctl_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 22483799cd07..3072e5d6b692 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -240,6 +240,7 @@ enum rdma_driver_id { RDMA_DRIVER_OCRDMA, RDMA_DRIVER_NES, RDMA_DRIVER_I40IW, + RDMA_DRIVER_IRDMA = RDMA_DRIVER_I40IW, RDMA_DRIVER_VMW_PVRDMA, RDMA_DRIVER_QEDR, RDMA_DRIVER_HNS, -- cgit v1.2.3 From 48d6b3336a9fc570b48126c4409abf94dd5c5e8a Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 2 Jun 2021 15:51:36 -0500 Subject: RDMA/irdma: Add ABI definitions Add ABI definitions for irdma. Link: https://lore.kernel.org/r/20210602205138.889-15-shiraz.saleem@intel.com Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/irdma-abi.h | 111 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 include/uapi/rdma/irdma-abi.h (limited to 'include/uapi') diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h new file mode 100644 index 000000000000..26b638a7ad97 --- /dev/null +++ b/include/uapi/rdma/irdma-abi.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2006 - 2021 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + */ + +#ifndef IRDMA_ABI_H +#define IRDMA_ABI_H + +#include + +/* irdma must support legacy GEN_1 i40iw kernel + * and user-space whose last ABI ver is 5 + */ +#define IRDMA_ABI_VER 5 + +enum irdma_memreg_type { + IRDMA_MEMREG_TYPE_MEM = 0, + IRDMA_MEMREG_TYPE_QP = 1, + IRDMA_MEMREG_TYPE_CQ = 2, +}; + +struct irdma_alloc_ucontext_req { + __u32 rsvd32; + __u8 userspace_ver; + __u8 rsvd8[3]; +}; + +struct irdma_alloc_ucontext_resp { + __u32 max_pds; + __u32 max_qps; + __u32 wq_size; /* size of the WQs (SQ+RQ) in the mmaped area */ + __u8 kernel_ver; + __u8 rsvd[3]; + __aligned_u64 feature_flags; + __aligned_u64 db_mmap_key; + __u32 max_hw_wq_frags; + __u32 max_hw_read_sges; + __u32 max_hw_inline; + __u32 max_hw_rq_quanta; + __u32 max_hw_wq_quanta; + __u32 min_hw_cq_size; + __u32 max_hw_cq_size; + __u16 max_hw_sq_chunk; + __u8 hw_rev; + __u8 rsvd2; +}; + +struct irdma_alloc_pd_resp { + __u32 pd_id; + __u8 rsvd[4]; +}; + +struct irdma_resize_cq_req { + __aligned_u64 user_cq_buffer; +}; + +struct irdma_create_cq_req { + __aligned_u64 user_cq_buf; + __aligned_u64 user_shadow_area; +}; + +struct irdma_create_qp_req { + __aligned_u64 user_wqe_bufs; + __aligned_u64 user_compl_ctx; +}; + +struct irdma_mem_reg_req { + __u16 reg_type; /* enum irdma_memreg_type */ + __u16 cq_pages; + __u16 rq_pages; + __u16 sq_pages; +}; + +struct irdma_modify_qp_req { + __u8 sq_flush; + __u8 rq_flush; + __u8 rsvd[6]; +}; + +struct irdma_create_cq_resp { + __u32 cq_id; + __u32 cq_size; +}; + +struct irdma_create_qp_resp { + __u32 qp_id; + __u32 actual_sq_size; + __u32 actual_rq_size; + __u32 irdma_drv_opt; + __u16 push_idx; + __u8 lsmm; + __u8 rsvd; + __u32 qp_caps; +}; + +struct irdma_modify_qp_resp { + __aligned_u64 push_wqe_mmap_key; + __aligned_u64 push_db_mmap_key; + __u16 push_offset; + __u8 push_valid; + __u8 rsvd[5]; +}; + +struct irdma_create_ah_resp { + __u32 ah_id; + __u8 rsvd[4]; +}; +#endif /* IRDMA_ABI_H */ -- cgit v1.2.3 From fa0cf568fd76550c1ddb806c03a65a1a4a1ea909 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 2 Jun 2021 15:51:37 -0500 Subject: RDMA/irdma: Add irdma Kconfig/Makefile and remove i40iw Add Kconfig and Makefile to build irdma driver. Remove i40iw driver and add an alias in irdma. Remove legacy exported symbols i40e_register_client and i40e_unregister_client from i40e as they are no longer used. irdma is the replacement driver that supports X722. Link: https://lore.kernel.org/r/20210602205138.889-16-shiraz.saleem@intel.com Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/i40iw-abi.h | 107 ------------------------------------------ 1 file changed, 107 deletions(-) delete mode 100644 include/uapi/rdma/i40iw-abi.h (limited to 'include/uapi') diff --git a/include/uapi/rdma/i40iw-abi.h b/include/uapi/rdma/i40iw-abi.h deleted file mode 100644 index 79890baa6fdb..000000000000 --- a/include/uapi/rdma/i40iw-abi.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2006 - 2016 Intel Corporation. All rights reserved. - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef I40IW_ABI_H -#define I40IW_ABI_H - -#include - -#define I40IW_ABI_VER 5 - -struct i40iw_alloc_ucontext_req { - __u32 reserved32; - __u8 userspace_ver; - __u8 reserved8[3]; -}; - -struct i40iw_alloc_ucontext_resp { - __u32 max_pds; /* maximum pds allowed for this user process */ - __u32 max_qps; /* maximum qps allowed for this user process */ - __u32 wq_size; /* size of the WQs (sq+rq) allocated to the mmaped area */ - __u8 kernel_ver; - __u8 reserved[3]; -}; - -struct i40iw_alloc_pd_resp { - __u32 pd_id; - __u8 reserved[4]; -}; - -struct i40iw_create_cq_req { - __aligned_u64 user_cq_buffer; - __aligned_u64 user_shadow_area; -}; - -struct i40iw_create_qp_req { - __aligned_u64 user_wqe_buffers; - __aligned_u64 user_compl_ctx; - - /* UDA QP PHB */ - __aligned_u64 user_sq_phb; /* place for VA of the sq phb buff */ - __aligned_u64 user_rq_phb; /* place for VA of the rq phb buff */ -}; - -enum i40iw_memreg_type { - IW_MEMREG_TYPE_MEM = 0x0000, - IW_MEMREG_TYPE_QP = 0x0001, - IW_MEMREG_TYPE_CQ = 0x0002, -}; - -struct i40iw_mem_reg_req { - __u16 reg_type; /* Memory, QP or CQ */ - __u16 cq_pages; - __u16 rq_pages; - __u16 sq_pages; -}; - -struct i40iw_create_cq_resp { - __u32 cq_id; - __u32 cq_size; - __u32 mmap_db_index; - __u32 reserved; -}; - -struct i40iw_create_qp_resp { - __u32 qp_id; - __u32 actual_sq_size; - __u32 actual_rq_size; - __u32 i40iw_drv_opt; - __u16 push_idx; - __u8 lsmm; - __u8 rsvd2; -}; - -#endif -- cgit v1.2.3 From a83d958504734f78f42b1e3392d93816297e790a Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 3 Jun 2021 21:20:26 +0200 Subject: Bluetooth: Fix VIRTIO_ID_BT assigned number It turned out that the VIRTIO_ID_* are not assigned in the virtio_ids.h file in the upstream kernel. Picking the next free one was wrong and there is a process that has been followed now. See https://github.com/oasis-tcs/virtio-spec/issues/108 for details. Fixes: afd2daa26c7a ("Bluetooth: Add support for virtio transport driver") Signed-off-by: Marcel Holtmann Signed-off-by: Luiz Augusto von Dentz --- include/uapi/linux/virtio_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index f0c35ce8628c..4fe842c3a3a9 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -54,7 +54,7 @@ #define VIRTIO_ID_SOUND 25 /* virtio sound */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ -#define VIRTIO_ID_BT 28 /* virtio bluetooth */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_BT 40 /* virtio bluetooth */ #endif /* _LINUX_VIRTIO_IDS_H */ -- cgit v1.2.3 From e32ea44c7ae476f4c90e35ab0a29dc8ff082bc11 Mon Sep 17 00:00:00 2001 From: Andreas Roeseler Date: Thu, 3 Jun 2021 16:22:11 -0500 Subject: icmp: fix lib conflict with trinity Including and in the dependencies breaks compilation of trinity due to multiple definitions. is only used in to provide the definition of the struct in_addr, but this can be substituted out by using the datatype __be32. Signed-off-by: Andreas Roeseler Signed-off-by: David S. Miller --- include/uapi/linux/icmp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h index c1da8244c5e1..163c0998aec9 100644 --- a/include/uapi/linux/icmp.h +++ b/include/uapi/linux/icmp.h @@ -20,7 +20,6 @@ #include #include -#include #include #include @@ -154,7 +153,7 @@ struct icmp_ext_echo_iio { struct { struct icmp_ext_echo_ctype3_hdr ctype3_hdr; union { - struct in_addr ipv4_addr; + __be32 ipv4_addr; struct in6_addr ipv6_addr; } ip_addr; } addr; -- cgit v1.2.3 From 819fbd3d8ef36c09576c2a0ffea503f5c46e9177 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 1 Jun 2021 11:31:30 +0200 Subject: media: dvb header files: move some headers to staging The audio, video and OSD APIs are used upstream only by the av7110 driver, which was moved to staging. So, move the corresponding header files to it. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/audio.h | 101 ------------------- include/uapi/linux/dvb/osd.h | 181 --------------------------------- include/uapi/linux/dvb/video.h | 220 ----------------------------------------- 3 files changed, 502 deletions(-) delete mode 100644 include/uapi/linux/dvb/audio.h delete mode 100644 include/uapi/linux/dvb/osd.h delete mode 100644 include/uapi/linux/dvb/video.h (limited to 'include/uapi') diff --git a/include/uapi/linux/dvb/audio.h b/include/uapi/linux/dvb/audio.h deleted file mode 100644 index 2f869da69171..000000000000 --- a/include/uapi/linux/dvb/audio.h +++ /dev/null @@ -1,101 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */ -/* - * audio.h - DEPRECATED MPEG-TS audio decoder API - * - * NOTE: should not be used on future drivers - * - * Copyright (C) 2000 Ralph Metzler - * & Marcus Metzler - * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Lesser Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - */ - -#ifndef _DVBAUDIO_H_ -#define _DVBAUDIO_H_ - -#include - -typedef enum { - AUDIO_SOURCE_DEMUX, /* Select the demux as the main source */ - AUDIO_SOURCE_MEMORY /* Select internal memory as the main source */ -} audio_stream_source_t; - - -typedef enum { - AUDIO_STOPPED, /* Device is stopped */ - AUDIO_PLAYING, /* Device is currently playing */ - AUDIO_PAUSED /* Device is paused */ -} audio_play_state_t; - - -typedef enum { - AUDIO_STEREO, - AUDIO_MONO_LEFT, - AUDIO_MONO_RIGHT, - AUDIO_MONO, - AUDIO_STEREO_SWAPPED -} audio_channel_select_t; - - -typedef struct audio_mixer { - unsigned int volume_left; - unsigned int volume_right; - /* what else do we need? bass, pass-through, ... */ -} audio_mixer_t; - - -typedef struct audio_status { - int AV_sync_state; /* sync audio and video? */ - int mute_state; /* audio is muted */ - audio_play_state_t play_state; /* current playback state */ - audio_stream_source_t stream_source; /* current stream source */ - audio_channel_select_t channel_select; /* currently selected channel */ - int bypass_mode; /* pass on audio data to */ - audio_mixer_t mixer_state; /* current mixer state */ -} audio_status_t; /* separate decoder hardware */ - - -/* for GET_CAPABILITIES and SET_FORMAT, the latter should only set one bit */ -#define AUDIO_CAP_DTS 1 -#define AUDIO_CAP_LPCM 2 -#define AUDIO_CAP_MP1 4 -#define AUDIO_CAP_MP2 8 -#define AUDIO_CAP_MP3 16 -#define AUDIO_CAP_AAC 32 -#define AUDIO_CAP_OGG 64 -#define AUDIO_CAP_SDDS 128 -#define AUDIO_CAP_AC3 256 - -#define AUDIO_STOP _IO('o', 1) -#define AUDIO_PLAY _IO('o', 2) -#define AUDIO_PAUSE _IO('o', 3) -#define AUDIO_CONTINUE _IO('o', 4) -#define AUDIO_SELECT_SOURCE _IO('o', 5) -#define AUDIO_SET_MUTE _IO('o', 6) -#define AUDIO_SET_AV_SYNC _IO('o', 7) -#define AUDIO_SET_BYPASS_MODE _IO('o', 8) -#define AUDIO_CHANNEL_SELECT _IO('o', 9) -#define AUDIO_GET_STATUS _IOR('o', 10, audio_status_t) - -#define AUDIO_GET_CAPABILITIES _IOR('o', 11, unsigned int) -#define AUDIO_CLEAR_BUFFER _IO('o', 12) -#define AUDIO_SET_ID _IO('o', 13) -#define AUDIO_SET_MIXER _IOW('o', 14, audio_mixer_t) -#define AUDIO_SET_STREAMTYPE _IO('o', 15) -#define AUDIO_BILINGUAL_CHANNEL_SELECT _IO('o', 20) - -#endif /* _DVBAUDIO_H_ */ diff --git a/include/uapi/linux/dvb/osd.h b/include/uapi/linux/dvb/osd.h deleted file mode 100644 index 858997c74043..000000000000 --- a/include/uapi/linux/dvb/osd.h +++ /dev/null @@ -1,181 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */ -/* - * osd.h - DEPRECATED On Screen Display API - * - * NOTE: should not be used on future drivers - * - * Copyright (C) 2001 Ralph Metzler - * & Marcus Metzler - * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Lesser Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - */ - -#ifndef _DVBOSD_H_ -#define _DVBOSD_H_ - -#include - -typedef enum { - /* All functions return -2 on "not open" */ - OSD_Close = 1, /* () */ - /* - * Disables OSD and releases the buffers - * returns 0 on success - */ - OSD_Open, /* (x0,y0,x1,y1,BitPerPixel[2/4/8](color&0x0F),mix[0..15](color&0xF0)) */ - /* - * Opens OSD with this size and bit depth - * returns 0 on success, -1 on DRAM allocation error, -2 on "already open" - */ - OSD_Show, /* () */ - /* - * enables OSD mode - * returns 0 on success - */ - OSD_Hide, /* () */ - /* - * disables OSD mode - * returns 0 on success - */ - OSD_Clear, /* () */ - /* - * Sets all pixel to color 0 - * returns 0 on success - */ - OSD_Fill, /* (color) */ - /* - * Sets all pixel to color - * returns 0 on success - */ - OSD_SetColor, /* (color,R{x0},G{y0},B{x1},opacity{y1}) */ - /* - * set palette entry to , and apply - * R,G,B: 0..255 - * R=Red, G=Green, B=Blue - * opacity=0: pixel opacity 0% (only video pixel shows) - * opacity=1..254: pixel opacity as specified in header - * opacity=255: pixel opacity 100% (only OSD pixel shows) - * returns 0 on success, -1 on error - */ - OSD_SetPalette, /* (firstcolor{color},lastcolor{x0},data) */ - /* - * Set a number of entries in the palette - * sets the entries "firstcolor" through "lastcolor" from the array "data" - * data has 4 byte for each color: - * R,G,B, and a opacity value: 0->transparent, 1..254->mix, 255->pixel - */ - OSD_SetTrans, /* (transparency{color}) */ - /* - * Sets transparency of mixed pixel (0..15) - * returns 0 on success - */ - OSD_SetPixel, /* (x0,y0,color) */ - /* - * sets pixel , to color number - * returns 0 on success, -1 on error - */ - OSD_GetPixel, /* (x0,y0) */ - /* returns color number of pixel ,, or -1 */ - OSD_SetRow, /* (x0,y0,x1,data) */ - /* - * fills pixels x0,y through x1,y with the content of data[] - * returns 0 on success, -1 on clipping all pixel (no pixel drawn) - */ - OSD_SetBlock, /* (x0,y0,x1,y1,increment{color},data) */ - /* - * fills pixels x0,y0 through x1,y1 with the content of data[] - * inc contains the width of one line in the data block, - * inc<=0 uses blockwidth as linewidth - * returns 0 on success, -1 on clipping all pixel - */ - OSD_FillRow, /* (x0,y0,x1,color) */ - /* - * fills pixels x0,y through x1,y with the color - * returns 0 on success, -1 on clipping all pixel - */ - OSD_FillBlock, /* (x0,y0,x1,y1,color) */ - /* - * fills pixels x0,y0 through x1,y1 with the color - * returns 0 on success, -1 on clipping all pixel - */ - OSD_Line, /* (x0,y0,x1,y1,color) */ - /* - * draw a line from x0,y0 to x1,y1 with the color - * returns 0 on success - */ - OSD_Query, /* (x0,y0,x1,y1,xasp{color}}), yasp=11 */ - /* - * fills parameters with the picture dimensions and the pixel aspect ratio - * returns 0 on success - */ - OSD_Test, /* () */ - /* - * draws a test picture. for debugging purposes only - * returns 0 on success - * TODO: remove "test" in final version - */ - OSD_Text, /* (x0,y0,size,color,text) */ - OSD_SetWindow, /* (x0) set window with number 0 - * & Ralph Metzler - * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - */ - -#ifndef _UAPI_DVBVIDEO_H_ -#define _UAPI_DVBVIDEO_H_ - -#include -#ifndef __KERNEL__ -#include -#endif - -typedef enum { - VIDEO_FORMAT_4_3, /* Select 4:3 format */ - VIDEO_FORMAT_16_9, /* Select 16:9 format. */ - VIDEO_FORMAT_221_1 /* 2.21:1 */ -} video_format_t; - - -typedef enum { - VIDEO_PAN_SCAN, /* use pan and scan format */ - VIDEO_LETTER_BOX, /* use letterbox format */ - VIDEO_CENTER_CUT_OUT /* use center cut out format */ -} video_displayformat_t; - -typedef struct { - int w; - int h; - video_format_t aspect_ratio; -} video_size_t; - -typedef enum { - VIDEO_SOURCE_DEMUX, /* Select the demux as the main source */ - VIDEO_SOURCE_MEMORY /* If this source is selected, the stream - comes from the user through the write - system call */ -} video_stream_source_t; - - -typedef enum { - VIDEO_STOPPED, /* Video is stopped */ - VIDEO_PLAYING, /* Video is currently playing */ - VIDEO_FREEZED /* Video is freezed */ -} video_play_state_t; - - -/* Decoder commands */ -#define VIDEO_CMD_PLAY (0) -#define VIDEO_CMD_STOP (1) -#define VIDEO_CMD_FREEZE (2) -#define VIDEO_CMD_CONTINUE (3) - -/* Flags for VIDEO_CMD_FREEZE */ -#define VIDEO_CMD_FREEZE_TO_BLACK (1 << 0) - -/* Flags for VIDEO_CMD_STOP */ -#define VIDEO_CMD_STOP_TO_BLACK (1 << 0) -#define VIDEO_CMD_STOP_IMMEDIATELY (1 << 1) - -/* Play input formats: */ -/* The decoder has no special format requirements */ -#define VIDEO_PLAY_FMT_NONE (0) -/* The decoder requires full GOPs */ -#define VIDEO_PLAY_FMT_GOP (1) - -/* The structure must be zeroed before use by the application - This ensures it can be extended safely in the future. */ -struct video_command { - __u32 cmd; - __u32 flags; - union { - struct { - __u64 pts; - } stop; - - struct { - /* 0 or 1000 specifies normal speed, - 1 specifies forward single stepping, - -1 specifies backward single stepping, - >1: playback at speed/1000 of the normal speed, - <-1: reverse playback at (-speed/1000) of the normal speed. */ - __s32 speed; - __u32 format; - } play; - - struct { - __u32 data[16]; - } raw; - }; -}; - -/* FIELD_UNKNOWN can be used if the hardware does not know whether - the Vsync is for an odd, even or progressive (i.e. non-interlaced) - field. */ -#define VIDEO_VSYNC_FIELD_UNKNOWN (0) -#define VIDEO_VSYNC_FIELD_ODD (1) -#define VIDEO_VSYNC_FIELD_EVEN (2) -#define VIDEO_VSYNC_FIELD_PROGRESSIVE (3) - -struct video_event { - __s32 type; -#define VIDEO_EVENT_SIZE_CHANGED 1 -#define VIDEO_EVENT_FRAME_RATE_CHANGED 2 -#define VIDEO_EVENT_DECODER_STOPPED 3 -#define VIDEO_EVENT_VSYNC 4 - /* unused, make sure to use atomic time for y2038 if it ever gets used */ - long timestamp; - union { - video_size_t size; - unsigned int frame_rate; /* in frames per 1000sec */ - unsigned char vsync_field; /* unknown/odd/even/progressive */ - } u; -}; - - -struct video_status { - int video_blank; /* blank video on freeze? */ - video_play_state_t play_state; /* current state of playback */ - video_stream_source_t stream_source; /* current source (demux/memory) */ - video_format_t video_format; /* current aspect ratio of stream*/ - video_displayformat_t display_format;/* selected cropping mode */ -}; - - -struct video_still_picture { - char __user *iFrame; /* pointer to a single iframe in memory */ - __s32 size; -}; - - -typedef __u16 video_attributes_t; -/* bits: descr. */ -/* 15-14 Video compression mode (0=MPEG-1, 1=MPEG-2) */ -/* 13-12 TV system (0=525/60, 1=625/50) */ -/* 11-10 Aspect ratio (0=4:3, 3=16:9) */ -/* 9- 8 permitted display mode on 4:3 monitor (0=both, 1=only pan-sca */ -/* 7 line 21-1 data present in GOP (1=yes, 0=no) */ -/* 6 line 21-2 data present in GOP (1=yes, 0=no) */ -/* 5- 3 source resolution (0=720x480/576, 1=704x480/576, 2=352x480/57 */ -/* 2 source letterboxed (1=yes, 0=no) */ -/* 0 film/camera mode (0= - *camera, 1=film (625/50 only)) */ - - -/* bit definitions for capabilities: */ -/* can the hardware decode MPEG1 and/or MPEG2? */ -#define VIDEO_CAP_MPEG1 1 -#define VIDEO_CAP_MPEG2 2 -/* can you send a system and/or program stream to video device? - (you still have to open the video and the audio device but only - send the stream to the video device) */ -#define VIDEO_CAP_SYS 4 -#define VIDEO_CAP_PROG 8 -/* can the driver also handle SPU, NAVI and CSS encoded data? - (CSS API is not present yet) */ -#define VIDEO_CAP_SPU 16 -#define VIDEO_CAP_NAVI 32 -#define VIDEO_CAP_CSS 64 - - -#define VIDEO_STOP _IO('o', 21) -#define VIDEO_PLAY _IO('o', 22) -#define VIDEO_FREEZE _IO('o', 23) -#define VIDEO_CONTINUE _IO('o', 24) -#define VIDEO_SELECT_SOURCE _IO('o', 25) -#define VIDEO_SET_BLANK _IO('o', 26) -#define VIDEO_GET_STATUS _IOR('o', 27, struct video_status) -#define VIDEO_GET_EVENT _IOR('o', 28, struct video_event) -#define VIDEO_SET_DISPLAY_FORMAT _IO('o', 29) -#define VIDEO_STILLPICTURE _IOW('o', 30, struct video_still_picture) -#define VIDEO_FAST_FORWARD _IO('o', 31) -#define VIDEO_SLOWMOTION _IO('o', 32) -#define VIDEO_GET_CAPABILITIES _IOR('o', 33, unsigned int) -#define VIDEO_CLEAR_BUFFER _IO('o', 34) -#define VIDEO_SET_STREAMTYPE _IO('o', 36) -#define VIDEO_SET_FORMAT _IO('o', 37) -#define VIDEO_GET_SIZE _IOR('o', 55, video_size_t) - -/** - * VIDEO_GET_PTS - * - * Read the 33 bit presentation time stamp as defined - * in ITU T-REC-H.222.0 / ISO/IEC 13818-1. - * - * The PTS should belong to the currently played - * frame if possible, but may also be a value close to it - * like the PTS of the last decoded frame or the last PTS - * extracted by the PES parser. - */ -#define VIDEO_GET_PTS _IOR('o', 57, __u64) - -/* Read the number of displayed frames since the decoder was started */ -#define VIDEO_GET_FRAME_COUNT _IOR('o', 58, __u64) - -#define VIDEO_COMMAND _IOWR('o', 59, struct video_command) -#define VIDEO_TRY_COMMAND _IOWR('o', 60, struct video_command) - -#endif /* _UAPI_DVBVIDEO_H_ */ -- cgit v1.2.3 From 603e4922f1c81fc2ed3a87b4f91a8d3aafc7e093 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 31 May 2021 10:25:26 +0300 Subject: remove the raw driver The raw driver used to provide direct unbuffered access to block devices before O_DIRECT was invented. It has been obsolete for more than a decade. Acked-by: Greg Kroah-Hartman Acked-by: Arnd Bergmann Link: https://lore.kernel.org/lkml/Pine.LNX.4.64.0703180754060.6605@CPE00045a9c397f-CM001225dbafb6/ Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210531072526.97052-1-hch@lst.de Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/raw.h | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 include/uapi/linux/raw.h (limited to 'include/uapi') diff --git a/include/uapi/linux/raw.h b/include/uapi/linux/raw.h deleted file mode 100644 index 47874919d0b9..000000000000 --- a/include/uapi/linux/raw.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_RAW_H -#define __LINUX_RAW_H - -#include - -#define RAW_SETBIND _IO( 0xac, 0 ) -#define RAW_GETBIND _IO( 0xac, 1 ) - -struct raw_config_request -{ - int raw_minor; - __u64 block_major; - __u64 block_minor; -}; - -#endif /* __LINUX_RAW_H */ -- cgit v1.2.3 From 3e2926f8753dac1ded56c8ef3e91f56ee763dafd Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 4 Jun 2021 09:00:11 -0400 Subject: drm/panfrost: Add AFBC_FEATURES parameter The value of the AFBC_FEATURES register is required by userspace to determine AFBC support on Bifrost. A user on our IRC channel (#panfrost) reported a workload that raised a fault on one system's Mali G31 but worked flawlessly with another system's Mali G31. We determined the cause to be missing AFBC support on one vendor's Mali implementation -- it turns out AFBC is optional on Bifrost! Whether AFBC is supported or not is exposed in the AFBC_FEATURES register on Bifrost, which reads back as 0 on Midgard. A zero value indicates AFBC is fully supported, provided the architecture itself supports AFBC, allowing backwards-compatibility with Midgard. Bits 0 and 15 indicate that AFBC support is absent for texturing and rendering respectively. The user experiencing the fault reports that AFBC_FEATURES reads back 0x10001 on their system, confirming the architectural lack of AFBC. Userspace needs this parameter to know to disable AFBC on that chip, and perhaps others. v2: Fix typo from copy-paste fail. v3: Bump the UABI version. This commit was cherry-picked from another series so chalking this up to a rebase fail. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Steven Price Cc: Rob Herring Cc: Tomeu Vizoso Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20210604130011.3203-1-alyssa.rosenzweig@collabora.com --- include/uapi/drm/panfrost_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index ec19db1eead8..061e700dd06c 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -171,6 +171,7 @@ enum drm_panfrost_param { DRM_PANFROST_PARAM_JS_FEATURES15, DRM_PANFROST_PARAM_NR_CORE_GROUPS, DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, + DRM_PANFROST_PARAM_AFBC_FEATURES, }; struct drm_panfrost_get_param { -- cgit v1.2.3 From 90a187d26f526f389525ce98c625c0d88eafe430 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Fri, 19 Jun 2020 12:26:40 +0800 Subject: drm/amdgpu: add uapi to define yellow carp series Add a flag to define yellow carp series. Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 91a23187902e..0cbd1540aeac 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1134,6 +1134,7 @@ struct drm_amdgpu_info_video_caps { #define AMDGPU_FAMILY_RV 142 /* Raven */ #define AMDGPU_FAMILY_NV 143 /* Navi10 */ #define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ +#define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ #if defined(__cplusplus) } -- cgit v1.2.3 From 64c2c2c62f92339b176ea24403d8db16db36f9e6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 May 2021 16:07:48 +0200 Subject: quota: Change quotactl_path() systcall to an fd-based one Some users have pointed out that path-based syscalls are problematic in some environments and at least directory fd argument and possibly also resolve flags are desirable for such syscalls. Rather than reimplementing all details of pathname lookup and following where it may eventually evolve, let's go for full file descriptor based syscall similar to how ioctl(2) works since the beginning. Managing of quotas isn't performance sensitive so the extra overhead of open does not matter and we are able to consume O_PATH descriptors as well which makes open cheap anyway. Also for frequent operations (such as retrieving usage information for all users) we can reuse single fd and in fact get even better performance as well as avoiding races with possible remounts etc. Tested-by: Sascha Hauer Acked-by: Christian Brauner Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- include/uapi/asm-generic/unistd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 6de5a7fc066b..f211961ce1da 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -863,8 +863,8 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) -#define __NR_quotactl_path 443 -__SYSCALL(__NR_quotactl_path, sys_quotactl_path) +#define __NR_quotactl_fd 443 +__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd) #define __NR_landlock_create_ruleset 444 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) -- cgit v1.2.3 From e2cf17d3774c323ef6dab6e9f7c0cfc5e742afd9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 4 Jun 2021 12:27:07 +0200 Subject: netfilter: add new hook nfnl subsystem This nfnl subsystem allows to dump the list of all active netfiler hooks, e.g. defrag, conntrack, nf/ip/arp/ip6tables and so on. This helps to see what kind of features are currently enabled in the network stack. Sample output from nft tool using this infra: $ nft list hook ip input family ip hook input { +0000000010 nft_do_chain_inet [nf_tables] # nft table firewalld INPUT +0000000100 nf_nat_ipv4_local_in [nf_nat] +2147483647 ipv4_confirm [nf_conntrack] } Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink.h | 3 +- include/uapi/linux/netfilter/nfnetlink_hook.h | 55 +++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/netfilter/nfnetlink_hook.h (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 5bc960f220b3..6cd58cd2a6f0 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -60,7 +60,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_CTHELPER 9 #define NFNL_SUBSYS_NFTABLES 10 #define NFNL_SUBSYS_NFT_COMPAT 11 -#define NFNL_SUBSYS_COUNT 12 +#define NFNL_SUBSYS_HOOK 12 +#define NFNL_SUBSYS_COUNT 13 /* Reserved control nfnetlink messages */ #define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE diff --git a/include/uapi/linux/netfilter/nfnetlink_hook.h b/include/uapi/linux/netfilter/nfnetlink_hook.h new file mode 100644 index 000000000000..912ec60b26b0 --- /dev/null +++ b/include/uapi/linux/netfilter/nfnetlink_hook.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _NFNL_HOOK_H_ +#define _NFNL_HOOK_H_ + +enum nfnl_hook_msg_types { + NFNL_MSG_HOOK_GET, + NFNL_MSG_HOOK_MAX, +}; + +/** + * enum nfnl_hook_attributes - netfilter hook netlink attributes + * + * @NFNLA_HOOK_HOOKNUM: netfilter hook number (NLA_U32) + * @NFNLA_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + * @NFNLA_HOOK_DEV: netdevice name (NLA_STRING) + * @NFNLA_HOOK_FUNCTION_NAME: hook function name (NLA_STRING) + * @NFNLA_HOOK_MODULE_NAME: kernel module that registered this hook (NLA_STRING) + * @NFNLA_HOOK_CHAIN_INFO: basechain hook metadata (NLA_NESTED) + */ +enum nfnl_hook_attributes { + NFNLA_HOOK_UNSPEC, + NFNLA_HOOK_HOOKNUM, + NFNLA_HOOK_PRIORITY, + NFNLA_HOOK_DEV, + NFNLA_HOOK_FUNCTION_NAME, + NFNLA_HOOK_MODULE_NAME, + NFNLA_HOOK_CHAIN_INFO, + __NFNLA_HOOK_MAX +}; +#define NFNLA_HOOK_MAX (__NFNLA_HOOK_MAX - 1) + +/** + * enum nfnl_hook_chain_info_attributes - chain description + * + * NFNLA_HOOK_INFO_DESC: nft chain and table name (enum nft_table_attributes) (NLA_NESTED) + * NFNLA_HOOK_INFO_TYPE: chain type (enum nfnl_hook_chaintype) (NLA_U32) + */ +enum nfnl_hook_chain_info_attributes { + NFNLA_HOOK_INFO_UNSPEC, + NFNLA_HOOK_INFO_DESC, + NFNLA_HOOK_INFO_TYPE, + __NFNLA_HOOK_INFO_MAX, +}; +#define NFNLA_HOOK_INFO_MAX (__NFNLA_HOOK_INFO_MAX - 1) + +/** + * enum nfnl_hook_chaintype - chain type + * + * @NFNL_HOOK_TYPE_NFTABLES nf_tables base chain + */ +enum nfnl_hook_chaintype { + NFNL_HOOK_TYPE_NFTABLES = 0x1, +}; + +#endif /* _NFNL_HOOK_H */ -- cgit v1.2.3 From d409989b59ad0b8d108706db25e17c320a9664eb Mon Sep 17 00:00:00 2001 From: Chen Li Date: Mon, 7 Jun 2021 09:44:35 +0800 Subject: netlink: simplify NLMSG_DATA with NLMSG_HDRLEN The NLMSG_LENGTH(0) may confuse the API users, NLMSG_HDRLEN is much more clear. Besides, some code style problems are also fixed. Signed-off-by: Chen Li Signed-off-by: David S. Miller --- include/uapi/linux/netlink.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 3d94269bbfa8..4c0cde075c27 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -91,9 +91,10 @@ struct nlmsghdr { #define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) #define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN) #define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len)) -#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0))) +#define NLMSG_DATA(nlh) ((void *)(((char *)nlh) + NLMSG_HDRLEN)) #define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ - (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len))) + (struct nlmsghdr *)(((char *)(nlh)) + \ + NLMSG_ALIGN((nlh)->nlmsg_len))) #define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \ (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \ (nlh)->nlmsg_len <= (len)) -- cgit v1.2.3 From 992da01aa932b432ef8dc3885fa76415b5dbe43f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 10 Jun 2021 16:37:37 +0100 Subject: io_uring: change registration/upd/rsrc tagging ABI There are ABI moments about recently added rsrc registration/update and tagging that might become a nuisance in the future. First, IORING_REGISTER_RSRC[_UPD] hide different types of resources under it, so breaks fine control over them by restrictions. It works for now, but once those are wanted under restrictions it would require a rework. It was also inconvenient trying to fit a new resource not supporting all the features (e.g. dynamic update) into the interface, so better to return to IORING_REGISTER_* top level dispatching. Second, register/update were considered to accept a type of resource, however that's not a good idea because there might be several ways of registration of a single resource type, e.g. we may want to add non-contig buffers or anything more exquisite as dma mapped memory. So, remove IORING_RSRC_[FILE,BUFFER] out of the ABI, and place them internally for now to limit changes. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9b554897a7c17ad6e3becc48dfed2f7af9f423d5.1623339162.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index e1ae46683301..48b4ddcd56ff 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -298,8 +298,12 @@ enum { IORING_UNREGISTER_PERSONALITY = 10, IORING_REGISTER_RESTRICTIONS = 11, IORING_REGISTER_ENABLE_RINGS = 12, - IORING_REGISTER_RSRC = 13, - IORING_REGISTER_RSRC_UPDATE = 14, + + /* extended with tagging */ + IORING_REGISTER_FILES2 = 13, + IORING_REGISTER_FILES_UPDATE2 = 14, + IORING_REGISTER_BUFFERS2 = 15, + IORING_REGISTER_BUFFERS_UPDATE = 16, /* this goes last */ IORING_REGISTER_LAST @@ -312,14 +316,10 @@ struct io_uring_files_update { __aligned_u64 /* __s32 * */ fds; }; -enum { - IORING_RSRC_FILE = 0, - IORING_RSRC_BUFFER = 1, -}; - struct io_uring_rsrc_register { - __u32 type; __u32 nr; + __u32 resv; + __u64 resv2; __aligned_u64 data; __aligned_u64 tags; }; @@ -335,8 +335,8 @@ struct io_uring_rsrc_update2 { __u32 resv; __aligned_u64 data; __aligned_u64 tags; - __u32 type; __u32 nr; + __u32 resv2; }; /* Skip updating fd indexes set to this value in the fd table */ -- cgit v1.2.3 From 9690557e22d63f13534fd167d293ac8ed8b104f9 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 10 Jun 2021 16:37:38 +0100 Subject: io_uring: add feature flag for rsrc tags Add IORING_FEAT_RSRC_TAGS indicating that io_uring supports a bunch of new IORING_REGISTER operations, in particular IORING_REGISTER_[FILES[,UPDATE]2,BUFFERS[2,UPDATE]] that support rsrc tagging, and also indicating implemented dynamic fixed buffer updates. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9b995d4045b6c6b4ab7510ca124fd25ac2203af7.1623339162.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 48b4ddcd56ff..162ff99ed2cb 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -280,6 +280,7 @@ struct io_uring_params { #define IORING_FEAT_SQPOLL_NONFIXED (1U << 7) #define IORING_FEAT_EXT_ARG (1U << 8) #define IORING_FEAT_NATIVE_WORKERS (1U << 9) +#define IORING_FEAT_RSRC_TAGS (1U << 10) /* * io_uring_register(2) opcodes and arguments -- cgit v1.2.3 From 6ddb5680085a3eefe0c6267e3514060045a13c95 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 9 Jun 2021 10:27:01 +0800 Subject: audit: remove trailing spaces and tabs Run the following command to find and remove the trailing spaces and tabs: sed -r -i 's/[ \t]+$//' The files to be checked are as follows: kernel/audit* include/linux/audit.h include/uapi/linux/audit.h Signed-off-by: Zhen Lei Acked-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index cd2d8279a5e4..daa481729e9b 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -48,7 +48,7 @@ * 2500 - 2999 future user space (maybe integrity labels and related events) * * Messages from 1000-1199 are bi-directional. 1200-1299 & 2100 - 2999 are - * exclusively user space. 1300-2099 is kernel --> user space + * exclusively user space. 1300-2099 is kernel --> user space * communication. */ #define AUDIT_GET 1000 /* Get status */ @@ -78,7 +78,7 @@ #define AUDIT_LAST_USER_MSG 1199 #define AUDIT_FIRST_USER_MSG2 2100 /* More user space messages */ #define AUDIT_LAST_USER_MSG2 2999 - + #define AUDIT_DAEMON_START 1200 /* Daemon startup record */ #define AUDIT_DAEMON_END 1201 /* Daemon normal stop record */ #define AUDIT_DAEMON_ABORT 1202 /* Daemon error stop record */ -- cgit v1.2.3 From f07b2a5b04d4a50d931a0afe4e3e114ce09a2e4b Mon Sep 17 00:00:00 2001 From: Arseny Krasnov Date: Fri, 11 Jun 2021 14:12:22 +0300 Subject: virtio/vsock: defines and constants for SEQPACKET Add set of defines and constants for SOCK_SEQPACKET support in vsock. Signed-off-by: Arseny Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/uapi/linux/virtio_vsock.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h index 1d57ed3d84d2..3dd3555b2740 100644 --- a/include/uapi/linux/virtio_vsock.h +++ b/include/uapi/linux/virtio_vsock.h @@ -38,6 +38,9 @@ #include #include +/* The feature bitmap for virtio vsock */ +#define VIRTIO_VSOCK_F_SEQPACKET 1 /* SOCK_SEQPACKET supported */ + struct virtio_vsock_config { __le64 guest_cid; } __attribute__((packed)); @@ -65,6 +68,7 @@ struct virtio_vsock_hdr { enum virtio_vsock_type { VIRTIO_VSOCK_TYPE_STREAM = 1, + VIRTIO_VSOCK_TYPE_SEQPACKET = 2, }; enum virtio_vsock_op { @@ -91,4 +95,9 @@ enum virtio_vsock_shutdown { VIRTIO_VSOCK_SHUTDOWN_SEND = 2, }; +/* VIRTIO_VSOCK_OP_RW flags values */ +enum virtio_vsock_rw { + VIRTIO_VSOCK_SEQ_EOR = 1, +}; + #endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ -- cgit v1.2.3 From 00e77ed8e64d5f271c1f015c7153545980d48a76 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 12 Jun 2021 10:20:55 +0200 Subject: rtnetlink: add IFLA_PARENT_[DEV|DEV_BUS]_NAME In some cases, for example in the upcoming WWAN framework changes, there's no natural "parent netdev", so sometimes dummy netdevs are created or similar. IFLA_PARENT_DEV_NAME is a new attribute intended to contain a device (sysfs, struct device) name that can be used instead when creating a new netdev, if the rtnetlink family implements it. As suggested by Parav Pandit, we also introduce IFLA_PARENT_DEV_BUS_NAME attribute in order to uniquely identify a device on the system (with bus/name pair). ip-link(8) support for the generic parent device attributes will help us avoid code duplication, so no other link type will require a custom code to handle the parent name attribute. E.g. the WWAN interface creation command will looks like this: $ ip link add wwan0-1 parent-dev wwan0 type wwan channel-id 1 So, some future subsystem (or driver) FOO will have an interface creation command that looks like this: $ ip link add foo1-3 parent-dev foo1 type foo bar-id 3 baz-type Y Below is an example of dumping link info of a random device with these new attributes: $ ip --details link show wlp0s20f3 4: wlp0s20f3: mtu 1500 qdisc noqueue state UP mode DORMANT group default qlen 1000 ... parent_bus pci parent_dev 0000:00:14.3 Co-developed-by: Sergey Ryazanov Signed-off-by: Sergey Ryazanov Co-developed-by: Loic Poulain Signed-off-by: Loic Poulain Suggested-by: Sergey Ryazanov Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a5a7f0e64865..4882e81514b6 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -341,6 +341,13 @@ enum { IFLA_ALT_IFNAME, /* Alternative ifname */ IFLA_PERM_ADDRESS, IFLA_PROTO_DOWN_REASON, + + /* device (sysfs) name as parent, used instead + * of IFLA_LINK where there's no parent netdev + */ + IFLA_PARENT_DEV_NAME, + IFLA_PARENT_DEV_BUS_NAME, + __IFLA_MAX }; -- cgit v1.2.3 From 88b710532e53de2466d1033fb1d5125aabf3215a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 12 Jun 2021 10:20:56 +0200 Subject: wwan: add interface creation support Add support to create (and destroy) interfaces via a new rtnetlink kind "wwan". The responsible driver has to use the new wwan_register_ops() to make this possible. Signed-off-by: Johannes Berg Signed-off-by: Sergey Ryazanov Signed-off-by: Loic Poulain Signed-off-by: David S. Miller --- include/uapi/linux/wwan.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/uapi/linux/wwan.h (limited to 'include/uapi') diff --git a/include/uapi/linux/wwan.h b/include/uapi/linux/wwan.h new file mode 100644 index 000000000000..32a2720b4d11 --- /dev/null +++ b/include/uapi/linux/wwan.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (C) 2021 Intel Corporation. + */ +#ifndef _UAPI_WWAN_H_ +#define _UAPI_WWAN_H_ + +enum { + IFLA_WWAN_UNSPEC, + IFLA_WWAN_LINK_ID, /* u32 */ + + __IFLA_WWAN_MAX +}; +#define IFLA_WWAN_MAX (__IFLA_WWAN_MAX - 1) + +#endif /* _UAPI_WWAN_H_ */ -- cgit v1.2.3 From 87815ee9d0060a91bdf18266e42837a9adb5972e Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 13 Apr 2021 07:09:07 -0700 Subject: cxl/pci: Add media provisioning required commands Some of the commands have already been defined for the support of RAW commands (to be blocked). Unlike their usage in the RAW interface, when used through the supported interface, they will be coordinated and marshalled along with other commands being issued by userspace and the driver itself. That coordination will be added later. The list of commands was determined based on the learnings from libnvdimm and this list is provided directly from Dan. Recommended-by: Dan Williams Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20210413140907.534404-1-ben.widawsky@intel.com Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 3155382dfc9b..f6e8a005b113 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -29,6 +29,18 @@ ___C(GET_LSA, "Get Label Storage Area"), \ ___C(GET_HEALTH_INFO, "Get Health Info"), \ ___C(GET_LOG, "Get Log"), \ + ___C(SET_PARTITION_INFO, "Set Partition Information"), \ + ___C(SET_LSA, "Set Label Storage Area"), \ + ___C(GET_ALERT_CONFIG, "Get Alert Configuration"), \ + ___C(SET_ALERT_CONFIG, "Set Alert Configuration"), \ + ___C(GET_SHUTDOWN_STATE, "Get Shutdown State"), \ + ___C(SET_SHUTDOWN_STATE, "Set Shutdown State"), \ + ___C(GET_POISON, "Get Poison List"), \ + ___C(INJECT_POISON, "Inject Poison"), \ + ___C(CLEAR_POISON, "Clear Poison"), \ + ___C(GET_SCAN_MEDIA_CAPS, "Get Scan Media Capabilities"), \ + ___C(SCAN_MEDIA, "Scan Media"), \ + ___C(GET_SCAN_MEDIA, "Get Scan Media Results"), \ ___C(MAX, "invalid / last command") #define ___C(a, b) CXL_MEM_COMMAND_ID_##a -- cgit v1.2.3 From 8b1462b67f23da548f27b779a36b8ea75f5ef249 Mon Sep 17 00:00:00 2001 From: Marcin Juszkiewicz Date: Mon, 14 Jun 2021 17:37:12 +0200 Subject: quota: finish disable quotactl_path syscall In commit 5b9fedb31e47 ("quota: Disable quotactl_path syscall") Jan Kara disabled quotactl_path syscall on several architectures. This commit disables it on all architectures using unified list of system calls: - arm64 - arc - csky - h8300 - hexagon - nds32 - nios2 - openrisc - riscv (32/64) CC: Jan Kara CC: Christian Brauner CC: Sascha Hauer Link: https://lore.kernel.org/lkml/20210512153621.n5u43jsytbik4yze@wittgenstein Link: https://lore.kernel.org/r/20210614153712.313707-1-marcin@juszkiewicz.com.pl Fixes: 5b9fedb31e47 ("quota: Disable quotactl_path syscall") Acked-by: Christian Brauner Signed-off-by: Marcin Juszkiewicz Signed-off-by: Jan Kara --- include/uapi/asm-generic/unistd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 6de5a7fc066b..d2a942086fcb 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -863,8 +863,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) -#define __NR_quotactl_path 443 -__SYSCALL(__NR_quotactl_path, sys_quotactl_path) +/* 443 is reserved for quotactl_path */ #define __NR_landlock_create_ruleset 444 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) -- cgit v1.2.3 From e061047684af63f2d4f1338ec73140f6e29eb59f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 12 Jun 2021 21:32:21 +0900 Subject: bpf: Support BPF_FUNC_get_socket_cookie() for BPF_PROG_TYPE_SK_REUSEPORT. We will call sock_reuseport.prog for socket migration in the next commit, so the eBPF program has to know which listener is closing to select a new listener. We can currently get a unique ID of each listener in the userspace by calling bpf_map_lookup_elem() for BPF_MAP_TYPE_REUSEPORT_SOCKARRAY map. This patch makes the pointer of sk available in sk_reuseport_md so that we can get the ID by BPF_FUNC_get_socket_cookie() in the eBPF program. Suggested-by: Martin KaFai Lau Signed-off-by: Kuniyuki Iwashima Signed-off-by: Daniel Borkmann Reviewed-by: Eric Dumazet Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/netdev/20201119001154.kapwihc2plp4f7zc@kafai-mbp.dhcp.thefacebook.com/ Link: https://lore.kernel.org/bpf/20210612123224.12525-9-kuniyu@amazon.co.jp --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2c1ba70abbf1..f3b72588442b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5416,6 +5416,7 @@ struct sk_reuseport_md { __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ __u32 bind_inany; /* Is sock bound to an INANY address? */ __u32 hash; /* A hash of the packet 4 tuples */ + __bpf_md_ptr(struct bpf_sock *, sk); }; #define BPF_TAG_SIZE 8 -- cgit v1.2.3 From d5e4ddaeb6ab2c3c7fbb7b247a6d34bb0b18d87e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 12 Jun 2021 21:32:22 +0900 Subject: bpf: Support socket migration by eBPF. This patch introduces a new bpf_attach_type for BPF_PROG_TYPE_SK_REUSEPORT to check if the attached eBPF program is capable of migrating sockets. When the eBPF program is attached, we run it for socket migration if the expected_attach_type is BPF_SK_REUSEPORT_SELECT_OR_MIGRATE or net.ipv4.tcp_migrate_req is enabled. Currently, the expected_attach_type is not enforced for the BPF_PROG_TYPE_SK_REUSEPORT type of program. Thus, this commit follows the earlier idea in the commit aac3fc320d94 ("bpf: Post-hooks for sys_bind") to fix up the zero expected_attach_type in bpf_prog_load_fixup_attach_type(). Moreover, this patch adds a new field (migrating_sk) to sk_reuseport_md to select a new listener based on the child socket. migrating_sk varies depending on if it is migrating a request in the accept queue or during 3WHS. - accept_queue : sock (ESTABLISHED/SYN_RECV) - 3WHS : request_sock (NEW_SYN_RECV) In the eBPF program, we can select a new listener by BPF_FUNC_sk_select_reuseport(). Also, we can cancel migration by returning SK_DROP. This feature is useful when listeners have different settings at the socket API level or when we want to free resources as soon as possible. - SK_PASS with selected_sk, select it as a new listener - SK_PASS with selected_sk NULL, fallbacks to the random selection - SK_DROP, cancel the migration. There is a noteworthy point. We select a listening socket in three places, but we do not have struct skb at closing a listener or retransmitting a SYN+ACK. On the other hand, some helper functions do not expect skb is NULL (e.g. skb_header_pointer() in BPF_FUNC_skb_load_bytes(), skb_tail_pointer() in BPF_FUNC_skb_load_bytes_relative()). So we allocate an empty skb temporarily before running the eBPF program. Suggested-by: Martin KaFai Lau Signed-off-by: Kuniyuki Iwashima Signed-off-by: Daniel Borkmann Reviewed-by: Eric Dumazet Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/netdev/20201123003828.xjpjdtk4ygl6tg6h@kafai-mbp.dhcp.thefacebook.com/ Link: https://lore.kernel.org/netdev/20201203042402.6cskdlit5f3mw4ru@kafai-mbp.dhcp.thefacebook.com/ Link: https://lore.kernel.org/netdev/20201209030903.hhow5r53l6fmozjn@kafai-mbp.dhcp.thefacebook.com/ Link: https://lore.kernel.org/bpf/20210612123224.12525-10-kuniyu@amazon.co.jp --- include/uapi/linux/bpf.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f3b72588442b..bf9252c7381e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -994,6 +994,8 @@ enum bpf_attach_type { BPF_SK_LOOKUP, BPF_XDP, BPF_SK_SKB_VERDICT, + BPF_SK_REUSEPORT_SELECT, + BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, __MAX_BPF_ATTACH_TYPE }; @@ -5416,7 +5418,20 @@ struct sk_reuseport_md { __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ __u32 bind_inany; /* Is sock bound to an INANY address? */ __u32 hash; /* A hash of the packet 4 tuples */ + /* When reuse->migrating_sk is NULL, it is selecting a sk for the + * new incoming connection request (e.g. selecting a listen sk for + * the received SYN in the TCP case). reuse->sk is one of the sk + * in the reuseport group. The bpf prog can use reuse->sk to learn + * the local listening ip/port without looking into the skb. + * + * When reuse->migrating_sk is not NULL, reuse->sk is closed and + * reuse->migrating_sk is the socket that needs to be migrated + * to another listening socket. migrating_sk could be a fullsock + * sk that is fully established or a reqsk that is in-the-middle + * of 3-way handshake. + */ __bpf_md_ptr(struct bpf_sock *, sk); + __bpf_md_ptr(struct bpf_sock *, migrating_sk); }; #define BPF_TAG_SIZE 8 -- cgit v1.2.3 From 776c53c6a448905d8b9b161805b67f82301bfe91 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 4 Jun 2021 15:47:52 +0200 Subject: platform/surface: aggregator_cdev: Add support for forwarding events to user-space Currently, debugging unknown events requires writing a custom driver. This is somewhat difficult, slow to adapt, and not entirely user-friendly for quickly trying to figure out things on devices of some third-party user. We can do better. We already have a user-space interface intended for debugging SAM EC requests, so let's add support for receiving events to that. This commit provides support for receiving events by reading from the controller file. It additionally introduces two new IOCTLs to control which event categories will be forwarded. Specifically, a user-space client can specify which target categories it wants to receive events from by registering the corresponding notifier(s) via the IOCTLs and after that, read the received events by reading from the controller device. Signed-off-by: Maximilian Luz Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210604134755.535590-5-luzmaximilian@gmail.com Signed-off-by: Hans de Goede --- include/uapi/linux/surface_aggregator/cdev.h | 41 ++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/surface_aggregator/cdev.h b/include/uapi/linux/surface_aggregator/cdev.h index fbcce04abfe9..4f393fafc235 100644 --- a/include/uapi/linux/surface_aggregator/cdev.h +++ b/include/uapi/linux/surface_aggregator/cdev.h @@ -6,7 +6,7 @@ * device. This device provides direct user-space access to the SSAM EC. * Intended for debugging and development. * - * Copyright (C) 2020 Maximilian Luz + * Copyright (C) 2020-2021 Maximilian Luz */ #ifndef _UAPI_LINUX_SURFACE_AGGREGATOR_CDEV_H @@ -73,6 +73,43 @@ struct ssam_cdev_request { } response; } __attribute__((__packed__)); -#define SSAM_CDEV_REQUEST _IOWR(0xA5, 1, struct ssam_cdev_request) +/** + * struct ssam_cdev_notifier_desc - Notifier descriptor. + * @priority: Priority value determining the order in which notifier + * callbacks will be called. A higher value means higher + * priority, i.e. the associated callback will be executed + * earlier than other (lower priority) callbacks. + * @target_category: The event target category for which this notifier should + * receive events. + * + * Specifies the notifier that should be registered or unregistered, + * specifically with which priority and for which target category of events. + */ +struct ssam_cdev_notifier_desc { + __s32 priority; + __u8 target_category; +} __attribute__((__packed__)); + +/** + * struct ssam_cdev_event - SSAM event sent by the EC. + * @target_category: Target category of the event source. See &enum ssam_ssh_tc. + * @target_id: Target ID of the event source. + * @command_id: Command ID of the event. + * @instance_id: Instance ID of the event source. + * @length: Length of the event payload in bytes. + * @data: Event payload data. + */ +struct ssam_cdev_event { + __u8 target_category; + __u8 target_id; + __u8 command_id; + __u8 instance_id; + __u16 length; + __u8 data[]; +} __attribute__((__packed__)); + +#define SSAM_CDEV_REQUEST _IOWR(0xA5, 1, struct ssam_cdev_request) +#define SSAM_CDEV_NOTIF_REGISTER _IOW(0xA5, 2, struct ssam_cdev_notifier_desc) +#define SSAM_CDEV_NOTIF_UNREGISTER _IOW(0xA5, 3, struct ssam_cdev_notifier_desc) #endif /* _UAPI_LINUX_SURFACE_AGGREGATOR_CDEV_H */ -- cgit v1.2.3 From e8e298a653856b1f3a2bb7b1fe31d3faa93cc7dc Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 4 Jun 2021 15:47:53 +0200 Subject: platform/surface: aggregator_cdev: Allow enabling of events from user-space While events can already be enabled and disabled via the generic request IOCTL, this bypasses the internal reference counting mechanism of the controller. Due to that, disabling an event will turn it off regardless of any other client having requested said event, which may break functionality of that client. To solve this, add IOCTLs wrapping the ssam_controller_event_enable() and ssam_controller_event_disable() functions, which have been previously introduced for this specific purpose. Signed-off-by: Maximilian Luz Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210604134755.535590-6-luzmaximilian@gmail.com Signed-off-by: Hans de Goede --- include/uapi/linux/surface_aggregator/cdev.h | 32 ++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/surface_aggregator/cdev.h b/include/uapi/linux/surface_aggregator/cdev.h index 4f393fafc235..08f46b60b151 100644 --- a/include/uapi/linux/surface_aggregator/cdev.h +++ b/include/uapi/linux/surface_aggregator/cdev.h @@ -90,6 +90,36 @@ struct ssam_cdev_notifier_desc { __u8 target_category; } __attribute__((__packed__)); +/** + * struct ssam_cdev_event_desc - Event descriptor. + * @reg: Registry via which the event will be enabled/disabled. + * @reg.target_category: Target category for the event registry requests. + * @reg.target_id: Target ID for the event registry requests. + * @reg.cid_enable: Command ID for the event-enable request. + * @reg.cid_disable: Command ID for the event-disable request. + * @id: ID specifying the event. + * @id.target_category: Target category of the event source. + * @id.instance: Instance ID of the event source. + * @flags: Flags used for enabling the event. + * + * Specifies which event should be enabled/disabled and how to do that. + */ +struct ssam_cdev_event_desc { + struct { + __u8 target_category; + __u8 target_id; + __u8 cid_enable; + __u8 cid_disable; + } reg; + + struct { + __u8 target_category; + __u8 instance; + } id; + + __u8 flags; +} __attribute__((__packed__)); + /** * struct ssam_cdev_event - SSAM event sent by the EC. * @target_category: Target category of the event source. See &enum ssam_ssh_tc. @@ -111,5 +141,7 @@ struct ssam_cdev_event { #define SSAM_CDEV_REQUEST _IOWR(0xA5, 1, struct ssam_cdev_request) #define SSAM_CDEV_NOTIF_REGISTER _IOW(0xA5, 2, struct ssam_cdev_notifier_desc) #define SSAM_CDEV_NOTIF_UNREGISTER _IOW(0xA5, 3, struct ssam_cdev_notifier_desc) +#define SSAM_CDEV_EVENT_ENABLE _IOW(0xA5, 4, struct ssam_cdev_event_desc) +#define SSAM_CDEV_EVENT_DISABLE _IOW(0xA5, 5, struct ssam_cdev_event_desc) #endif /* _UAPI_LINUX_SURFACE_AGGREGATOR_CDEV_H */ -- cgit v1.2.3 From 8c40602b4be17571dfd75102f4f1e690311c5210 Mon Sep 17 00:00:00 2001 From: Guvenc Gulce Date: Wed, 16 Jun 2021 16:52:56 +0200 Subject: net/smc: Add netlink support for SMC statistics Add the netlink function which collects the statistics information and delivers it to the userspace. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 69 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 3e68da07fba2..f32f11b30963 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -47,6 +47,7 @@ enum { SMC_NETLINK_GET_LGR_SMCD, SMC_NETLINK_GET_DEV_SMCD, SMC_NETLINK_GET_DEV_SMCR, + SMC_NETLINK_GET_STATS, }; /* SMC_GENL_FAMILY top level attributes */ @@ -58,6 +59,7 @@ enum { SMC_GEN_LGR_SMCD, /* nest */ SMC_GEN_DEV_SMCD, /* nest */ SMC_GEN_DEV_SMCR, /* nest */ + SMC_GEN_STATS, /* nest */ __SMC_GEN_MAX, SMC_GEN_MAX = __SMC_GEN_MAX - 1 }; @@ -159,4 +161,71 @@ enum { SMC_NLA_DEV_MAX = __SMC_NLA_DEV_MAX - 1 }; +/* SMC_NLA_STATS_T_TX(RX)_RMB_SIZE nested attributes */ +/* SMC_NLA_STATS_TX(RX)PLOAD_SIZE nested attributes */ +enum { + SMC_NLA_STATS_PLOAD_PAD, + SMC_NLA_STATS_PLOAD_8K, /* u64 */ + SMC_NLA_STATS_PLOAD_16K, /* u64 */ + SMC_NLA_STATS_PLOAD_32K, /* u64 */ + SMC_NLA_STATS_PLOAD_64K, /* u64 */ + SMC_NLA_STATS_PLOAD_128K, /* u64 */ + SMC_NLA_STATS_PLOAD_256K, /* u64 */ + SMC_NLA_STATS_PLOAD_512K, /* u64 */ + SMC_NLA_STATS_PLOAD_1024K, /* u64 */ + SMC_NLA_STATS_PLOAD_G_1024K, /* u64 */ + __SMC_NLA_STATS_PLOAD_MAX, + SMC_NLA_STATS_PLOAD_MAX = __SMC_NLA_STATS_PLOAD_MAX - 1 +}; + +/* SMC_NLA_STATS_T_TX(RX)_RMB_STATS nested attributes */ +enum { + SMC_NLA_STATS_RMB_PAD, + SMC_NLA_STATS_RMB_SIZE_SM_PEER_CNT, /* u64 */ + SMC_NLA_STATS_RMB_SIZE_SM_CNT, /* u64 */ + SMC_NLA_STATS_RMB_FULL_PEER_CNT, /* u64 */ + SMC_NLA_STATS_RMB_FULL_CNT, /* u64 */ + SMC_NLA_STATS_RMB_REUSE_CNT, /* u64 */ + SMC_NLA_STATS_RMB_ALLOC_CNT, /* u64 */ + SMC_NLA_STATS_RMB_DGRADE_CNT, /* u64 */ + __SMC_NLA_STATS_RMB_MAX, + SMC_NLA_STATS_RMB_MAX = __SMC_NLA_STATS_RMB_MAX - 1 +}; + +/* SMC_NLA_STATS_SMCD_TECH and _SMCR_TECH nested attributes */ +enum { + SMC_NLA_STATS_T_PAD, + SMC_NLA_STATS_T_TX_RMB_SIZE, /* nest */ + SMC_NLA_STATS_T_RX_RMB_SIZE, /* nest */ + SMC_NLA_STATS_T_TXPLOAD_SIZE, /* nest */ + SMC_NLA_STATS_T_RXPLOAD_SIZE, /* nest */ + SMC_NLA_STATS_T_TX_RMB_STATS, /* nest */ + SMC_NLA_STATS_T_RX_RMB_STATS, /* nest */ + SMC_NLA_STATS_T_CLNT_V1_SUCC, /* u64 */ + SMC_NLA_STATS_T_CLNT_V2_SUCC, /* u64 */ + SMC_NLA_STATS_T_SRV_V1_SUCC, /* u64 */ + SMC_NLA_STATS_T_SRV_V2_SUCC, /* u64 */ + SMC_NLA_STATS_T_SENDPAGE_CNT, /* u64 */ + SMC_NLA_STATS_T_SPLICE_CNT, /* u64 */ + SMC_NLA_STATS_T_CORK_CNT, /* u64 */ + SMC_NLA_STATS_T_NDLY_CNT, /* u64 */ + SMC_NLA_STATS_T_URG_DATA_CNT, /* u64 */ + SMC_NLA_STATS_T_RX_BYTES, /* u64 */ + SMC_NLA_STATS_T_TX_BYTES, /* u64 */ + SMC_NLA_STATS_T_RX_CNT, /* u64 */ + SMC_NLA_STATS_T_TX_CNT, /* u64 */ + __SMC_NLA_STATS_T_MAX, + SMC_NLA_STATS_T_MAX = __SMC_NLA_STATS_T_MAX - 1 +}; + +/* SMC_GEN_STATS attributes */ +enum { + SMC_NLA_STATS_PAD, + SMC_NLA_STATS_SMCD_TECH, /* nest */ + SMC_NLA_STATS_SMCR_TECH, /* nest */ + SMC_NLA_STATS_CLNT_HS_ERR_CNT, /* u64 */ + SMC_NLA_STATS_SRV_HS_ERR_CNT, /* u64 */ + __SMC_NLA_STATS_MAX, + SMC_NLA_STATS_MAX = __SMC_NLA_STATS_MAX - 1 +}; #endif /* _UAPI_LINUX_SMC_H */ -- cgit v1.2.3 From f0dd7bf5e33066e554442c509ef6351728b95b51 Mon Sep 17 00:00:00 2001 From: Guvenc Gulce Date: Wed, 16 Jun 2021 16:52:57 +0200 Subject: net/smc: Add netlink support for SMC fallback statistics Add support to collect more detailed SMC fallback reason statistics and provide these statistics to user space on the netlink interface. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index f32f11b30963..0f7f87c70baf 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -48,6 +48,7 @@ enum { SMC_NETLINK_GET_DEV_SMCD, SMC_NETLINK_GET_DEV_SMCR, SMC_NETLINK_GET_STATS, + SMC_NETLINK_GET_FBACK_STATS, }; /* SMC_GENL_FAMILY top level attributes */ @@ -60,6 +61,7 @@ enum { SMC_GEN_DEV_SMCD, /* nest */ SMC_GEN_DEV_SMCR, /* nest */ SMC_GEN_STATS, /* nest */ + SMC_GEN_FBACK_STATS, /* nest */ __SMC_GEN_MAX, SMC_GEN_MAX = __SMC_GEN_MAX - 1 }; @@ -228,4 +230,16 @@ enum { __SMC_NLA_STATS_MAX, SMC_NLA_STATS_MAX = __SMC_NLA_STATS_MAX - 1 }; + +/* SMC_GEN_FBACK_STATS attributes */ +enum { + SMC_NLA_FBACK_STATS_PAD, + SMC_NLA_FBACK_STATS_TYPE, /* u8 */ + SMC_NLA_FBACK_STATS_SRV_CNT, /* u64 */ + SMC_NLA_FBACK_STATS_CLNT_CNT, /* u64 */ + SMC_NLA_FBACK_STATS_RSN_CODE, /* u32 */ + SMC_NLA_FBACK_STATS_RSN_CNT, /* u16 */ + __SMC_NLA_FBACK_STATS_MAX, + SMC_NLA_FBACK_STATS_MAX = __SMC_NLA_FBACK_STATS_MAX - 1 +}; #endif /* _UAPI_LINUX_SMC_H */ -- cgit v1.2.3 From 660a59369e1ed96dd0ff1d9d73bad5b48aa50884 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Mon, 7 Jun 2021 23:25:44 -0500 Subject: RDMA/rxe: Add bind MW fields to rxe_send_wr Add fields to struct rxe_send_wr in rdma_user_rxe.h to support bind MW work requests Link: https://lore.kernel.org/r/20210608042552.33275-2-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/rdma_user_rxe.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index 068433e2229d..e283c2220aba 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -99,7 +99,16 @@ struct rxe_send_wr { __u32 remote_qkey; __u16 pkey_index; } ud; + struct { + __aligned_u64 addr; + __aligned_u64 length; + __u32 mr_lkey; + __u32 mw_rkey; + __u32 rkey; + __u32 access; + } mw; /* reg is only used by the kernel and is not part of the uapi */ +#ifdef __KERNEL__ struct { union { struct ib_mr *mr; @@ -108,6 +117,7 @@ struct rxe_send_wr { __u32 key; __u32 access; } reg; +#endif } wr; }; -- cgit v1.2.3 From 836382dc24717af203ce06703530528827086955 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 16 Jun 2021 22:25:05 +0200 Subject: netfilter: nf_tables: add last expression Add a new optional expression that tells you when last matching on a given rule / set element element has happened. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 19715e2679d1..e94d1fa554cb 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1195,6 +1195,21 @@ enum nft_counter_attributes { }; #define NFTA_COUNTER_MAX (__NFTA_COUNTER_MAX - 1) +/** + * enum nft_last_attributes - nf_tables last expression netlink attributes + * + * @NFTA_LAST_SET: last update has been set, zero means never updated (NLA_U32) + * @NFTA_LAST_MSECS: milliseconds since last update (NLA_U64) + */ +enum nft_last_attributes { + NFTA_LAST_UNSPEC, + NFTA_LAST_SET, + NFTA_LAST_MSECS, + NFTA_LAST_PAD, + __NFTA_LAST_MAX +}; +#define NFTA_LAST_MAX (__NFTA_LAST_MAX - 1) + /** * enum nft_log_attributes - nf_tables log expression netlink attributes * -- cgit v1.2.3 From fe76421d1da1dcdb3a2cd8428ac40106bff28bc0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 Jun 2021 10:19:54 -0600 Subject: io_uring: allow user configurable IO thread CPU affinity io-wq defaults to per-node masks for IO workers. This works fine by default, but isn't particularly handy for workloads that prefer more specific affinities, for either performance or isolation reasons. This adds IORING_REGISTER_IOWQ_AFF that allows the user to pass in a CPU mask that is then applied to IO thread workers, and an IORING_UNREGISTER_IOWQ_AFF that simply resets the masks back to the default of per-node. Note that no care is given to existing IO threads, they will need to go through a reschedule before the affinity is correct if they are already running or sleeping. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 162ff99ed2cb..f1f9ac114b51 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -306,6 +306,10 @@ enum { IORING_REGISTER_BUFFERS2 = 15, IORING_REGISTER_BUFFERS_UPDATE = 16, + /* set/clear io-wq thread affinities */ + IORING_REGISTER_IOWQ_AFF = 17, + IORING_UNREGISTER_IOWQ_AFF = 18, + /* this goes last */ IORING_REGISTER_LAST }; -- cgit v1.2.3 From 644f706719f0297bc5f65c8891de1c32f042eae5 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 21 May 2021 11:51:36 +0200 Subject: KVM: x86: hyper-v: Introduce KVM_CAP_HYPERV_ENFORCE_CPUID Modeled after KVM_CAP_ENFORCE_PV_FEATURE_CPUID, the new capability allows for limiting Hyper-V features to those exposed to the guest in Hyper-V CPUIDs (0x40000003, 0x40000004, ...). Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20210521095204.2161214-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 79d9c44d1ad7..792816144092 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1083,6 +1083,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 #define KVM_CAP_PTP_KVM 198 +#define KVM_CAP_HYPERV_ENFORCE_CPUID 199 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 6dba940352038b56db9b591b172fb2ec76a5fd5e Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 7 Jun 2021 12:02:02 +0300 Subject: KVM: x86: Introduce KVM_GET_SREGS2 / KVM_SET_SREGS2 This is a new version of KVM_GET_SREGS / KVM_SET_SREGS. It has the following changes: * Has flags for future extensions * Has vcpu's PDPTRs, allowing to save/restore them on migration. * Lacks obsolete interrupt bitmap (done now via KVM_SET_VCPU_EVENTS) New capability, KVM_CAP_SREGS2 is added to signal the userspace of this ioctl. Signed-off-by: Maxim Levitsky Message-Id: <20210607090203.133058-8-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 792816144092..90d44138dbfb 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1084,6 +1084,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 #define KVM_CAP_PTP_KVM 198 #define KVM_CAP_HYPERV_ENFORCE_CPUID 199 +#define KVM_CAP_SREGS2 200 #ifdef KVM_CAP_IRQ_ROUTING @@ -1622,6 +1623,9 @@ struct kvm_xen_hvm_attr { #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) #define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) +#define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) +#define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) + struct kvm_xen_vcpu_attr { __u16 type; __u16 pad[3]; -- cgit v1.2.3 From 0dbb11230437895f7cd6fc55da61cef011e997d8 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Tue, 8 Jun 2021 18:05:43 +0000 Subject: KVM: X86: Introduce KVM_HC_MAP_GPA_RANGE hypercall This hypercall is used by the SEV guest to notify a change in the page encryption status to the hypervisor. The hypercall should be invoked only when the encryption attribute is changed from encrypted -> decrypted and vice versa. By default all guest pages are considered encrypted. The hypercall exits to userspace to manage the guest shared regions and integrate with the userspace VMM's migration code. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Paolo Bonzini Cc: Joerg Roedel Cc: Borislav Petkov Cc: Tom Lendacky Cc: x86@kernel.org Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Steve Rutherford Signed-off-by: Brijesh Singh Signed-off-by: Ashish Kalra Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Co-developed-by: Paolo Bonzini Signed-off-by: Paolo Bonzini Message-Id: <90778988e1ee01926ff9cac447aacb745f954c8c.1623174621.git.ashish.kalra@amd.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + include/uapi/linux/kvm_para.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 90d44138dbfb..9febe1412f7a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1085,6 +1085,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PTP_KVM 198 #define KVM_CAP_HYPERV_ENFORCE_CPUID 199 #define KVM_CAP_SREGS2 200 +#define KVM_CAP_EXIT_HYPERCALL 201 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h index 8b86609849b9..960c7e93d1a9 100644 --- a/include/uapi/linux/kvm_para.h +++ b/include/uapi/linux/kvm_para.h @@ -29,6 +29,7 @@ #define KVM_HC_CLOCK_PAIRING 9 #define KVM_HC_SEND_IPI 10 #define KVM_HC_SCHED_YIELD 11 +#define KVM_HC_MAP_GPA_RANGE 12 /* * hypercalls use architecture specific -- cgit v1.2.3 From 2d8ea148e553e1dd4e80a87741abdfb229e2b323 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Thu, 17 Jun 2021 11:37:11 +0800 Subject: net: fix mistake path for netdev_features_strings Th_strings arrays netdev_features_strings, tunable_strings, and phy_tunable_strings has been moved to file net/ethtool/common.c. So fixes the comment. Signed-off-by: Jian Shen Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index cfef6b08169a..67aa7134b301 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -233,7 +233,7 @@ enum tunable_id { ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */ /* * Add your fresh new tunable attribute above and remember to update - * tunable_strings[] in net/core/ethtool.c + * tunable_strings[] in net/ethtool/common.c */ __ETHTOOL_TUNABLE_COUNT, }; @@ -297,7 +297,7 @@ enum phy_tunable_id { ETHTOOL_PHY_EDPD, /* * Add your fresh new phy tunable attribute above and remember to update - * phy_tunable_strings[] in net/core/ethtool.c + * phy_tunable_strings[] in net/ethtool/common.c */ __ETHTOOL_PHY_TUNABLE_COUNT, }; -- cgit v1.2.3 From 68f5d3f3b6543266b29e047cfaf9842333019b4c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 5 Mar 2021 13:19:58 +0100 Subject: um: add PCI over virtio emulation driver To support testing of PCI/PCIe drivers in UML, add a PCI bus support driver. This driver uses virtio, which in UML is really just vhost-user, to talk to devices, and adds the devices to the virtual PCI bus in the system. Since virtio already allows DMA/bus mastering this really isn't all that hard, of course we need the logic_iomem infrastructure that was added by a previous patch. The protocol to talk to the device is has a few fairly simple messages for reading to/writing from config and IO spaces, and messages for the device to send the various interrupts (INT#, MSI/MSI-X and while suspended PME#). Note that currently no offical virtio device ID is assigned for this protocol, as a consequence this patch requires defining it in the Kconfig, with a default that makes the driver refuse to work at all. Finally, in order to add support for MSI/MSI-X interrupts, some small changes are needed in the UML IRQ code, it needs to have more interrupts, changing NR_IRQS from 64 to 128 if this driver is enabled, but not actually use them for anything so that the generic IRQ domain/MSI infrastructure can allocate IRQ numbers. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- include/uapi/linux/virtio_pcidev.h | 64 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 include/uapi/linux/virtio_pcidev.h (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_pcidev.h b/include/uapi/linux/virtio_pcidev.h new file mode 100644 index 000000000000..89daa88bcfef --- /dev/null +++ b/include/uapi/linux/virtio_pcidev.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* + * Copyright (C) 2021 Intel Corporation + * Author: Johannes Berg + */ +#ifndef _UAPI_LINUX_VIRTIO_PCIDEV_H +#define _UAPI_LINUX_VIRTIO_PCIDEV_H +#include + +/** + * enum virtio_pcidev_ops - virtual PCI device operations + * @VIRTIO_PCIDEV_OP_CFG_READ: read config space, size is 1, 2, 4 or 8; + * the @data field should be filled in by the device (in little endian). + * @VIRTIO_PCIDEV_OP_CFG_WRITE: write config space, size is 1, 2, 4 or 8; + * the @data field contains the data to write (in little endian). + * @VIRTIO_PCIDEV_OP_BAR_READ: read BAR mem/pio, size can be variable; + * the @data field should be filled in by the device (in little endian). + * @VIRTIO_PCIDEV_OP_BAR_WRITE: write BAR mem/pio, size can be variable; + * the @data field contains the data to write (in little endian). + * @VIRTIO_PCIDEV_OP_MMIO_MEMSET: memset MMIO, size is variable but + * the @data field only has one byte (unlike @VIRTIO_PCIDEV_OP_MMIO_WRITE) + * @VIRTIO_PCIDEV_OP_INT: legacy INTx# pin interrupt, the addr field is 1-4 for + * the number + * @VIRTIO_PCIDEV_OP_MSI: MSI(-X) interrupt, this message basically transports + * the 16- or 32-bit write that would otherwise be done into memory, + * analogous to the write messages (@VIRTIO_PCIDEV_OP_MMIO_WRITE) above + * @VIRTIO_PCIDEV_OP_PME: Dummy message whose content is ignored (and should be + * all zeroes) to signal the PME# pin. + */ +enum virtio_pcidev_ops { + VIRTIO_PCIDEV_OP_RESERVED = 0, + VIRTIO_PCIDEV_OP_CFG_READ, + VIRTIO_PCIDEV_OP_CFG_WRITE, + VIRTIO_PCIDEV_OP_MMIO_READ, + VIRTIO_PCIDEV_OP_MMIO_WRITE, + VIRTIO_PCIDEV_OP_MMIO_MEMSET, + VIRTIO_PCIDEV_OP_INT, + VIRTIO_PCIDEV_OP_MSI, + VIRTIO_PCIDEV_OP_PME, +}; + +/** + * struct virtio_pcidev_msg - virtio PCI device operation + * @op: the operation to do + * @bar: the bar (only with BAR read/write messages) + * @reserved: reserved + * @size: the size of the read/write (in bytes) + * @addr: the address to read/write + * @data: the data, normally @size long, but just one byte for + * %VIRTIO_PCIDEV_OP_MMIO_MEMSET + * + * Note: the fields are all in native (CPU) endian, however, the + * @data values will often be in little endian (see the ops above.) + */ +struct virtio_pcidev_msg { + __u8 op; + __u8 bar; + __u16 reserved; + __u32 size; + __u64 addr; + __u8 data[]; +}; + +#endif /* _UAPI_LINUX_VIRTIO_PCIDEV_H */ -- cgit v1.2.3 From 8e8125f192288802267157f613c0ca654dfbde8e Mon Sep 17 00:00:00 2001 From: Yuri Nudelman Date: Tue, 25 May 2021 14:49:52 +0300 Subject: habanalabs: add debug flag to prevent failure on timeout Sometimes it is useful to allow the command to continue running despite the timeout occurred, to differentiate between really stuck or just very time consuming commands. This can be achieved by passing a new debug flag alongside the cs, HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT. Anyway, if the timeout occurred, a warning print shall be issued, however this shall not fail the submission. Signed-off-by: Yuri Nudelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 6d2d34c9f375..a47485a8d411 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -664,6 +664,7 @@ struct hl_cs_chunk { #define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST 0x80 #define HL_CS_FLAGS_STAGED_SUBMISSION_LAST 0x100 #define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200 +#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400 #define HL_CS_STATUS_SUCCESS 0 -- cgit v1.2.3 From e307b302be8beb7fb59aa16621d5081b69397076 Mon Sep 17 00:00:00 2001 From: Yuri Nudelman Date: Mon, 24 May 2021 11:25:21 +0300 Subject: habanalabs: added open_stats info ioctl In a system with multiple ASICs, there is a need to provide monitoring tools with information on how long a device was opened and how many times a device was opened. Therefore, we add a new opcode to the INFO ioctl to provide that information. Signed-off-by: Yuri Nudelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index a47485a8d411..a47a731e4527 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -313,6 +313,7 @@ enum hl_device_status { * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency + * HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -331,6 +332,7 @@ enum hl_device_status { #define HL_INFO_TOTAL_ENERGY 15 #define HL_INFO_PLL_FREQUENCY 16 #define HL_INFO_POWER 17 +#define HL_INFO_OPEN_STATS 18 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 @@ -444,6 +446,16 @@ struct hl_pll_frequency_info { __u16 output[HL_PLL_NUM_OUTPUTS]; }; +/** + * struct hl_open_stats_info - device open statistics information + * @open_counter: ever growing counter, increased on each successful dev open + * @last_open_period_ms: duration (ms) device was open last time + */ +struct hl_open_stats_info { + __u64 open_counter; + __u64 last_open_period_ms; +}; + /** * struct hl_power_info - power information * @power: power consumption -- cgit v1.2.3 From 8b532109bf885b7b59b93487bc4672eb6d071b78 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Thu, 17 Jun 2021 19:16:44 +0200 Subject: seg6: add support for SRv6 End.DT46 Behavior IETF RFC 8986 [1] includes the definition of SRv6 End.DT4, End.DT6, and End.DT46 Behaviors. The current SRv6 code in the Linux kernel only implements End.DT4 and End.DT6 which can be used respectively to support IPv4-in-IPv6 and IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic. The proposed End.DT46 implementation is meant to support the decapsulation of IPv4 and IPv6 traffic coming from a single SRv6 tunnel. The implementation of the SRv6 End.DT46 Behavior in the Linux kernel greatly simplifies the setup and operations of SRv6 VPNs. The SRv6 End.DT46 Behavior leverages the infrastructure of SRv6 End.DT{4,6} Behaviors implemented so far, because it makes use of a VRF device in order to force the routing lookup into the associated routing table. To make the End.DT46 work properly, it must be guaranteed that the routing table used for routing lookup operations is bound to one and only one VRF during the tunnel creation. Such constraint has to be enforced by enabling the VRF strict_mode sysctl parameter, i.e.: $ sysctl -wq net.vrf.strict_mode=1 Note that the same approach is used for the SRv6 End.DT4 Behavior and for the End.DT6 Behavior in VRF mode. The command used to instantiate an SRv6 End.DT46 Behavior is straightforward, i.e.: $ ip -6 route add 2001:db8::1 encap seg6local action End.DT46 vrftable 100 dev vrf100. [1] https://www.rfc-editor.org/rfc/rfc8986.html#name-enddt46-decapsulation-and-s ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Performance and impact of SRv6 End.DT46 Behavior on the SRv6 Networking ======================================================================= This patch aims to add the SRv6 End.DT46 Behavior with minimal impact on the performance of SRv6 End.DT4 and End.DT6 Behaviors. In order to verify this, we tested the performance of the newly introduced SRv6 End.DT46 Behavior and compared it with the performance of SRv6 End.DT{4,6} Behaviors, considering both the patched kernel and the kernel before applying the End.DT46 patch (referred to as vanilla kernel). In details, the following decapsulation scenarios were considered: 1.a) IPv6 traffic in SRv6 End.DT46 Behavior on patched kernel; 1.b) IPv4 traffic in SRv6 End.DT46 Behavior on patched kernel; 2.a) SRv6 End.DT6 Behavior (VRF mode) on patched kernel; 2.b) SRv6 End.DT4 Behavior on patched kernel; 3.a) SRv6 End.DT6 Behavior (VRF mode) on vanilla kernel (without the End.DT46 patch); 3.b) SRv6 End.DT4 Behavior on vanilla kernel (without the End.DT46 patch). All tests were performed on a testbed deployed on the CloudLab [2] facilities. We considered IPv{4,6} traffic handled by a single core (at 2.4 GHz on a Xeon(R) CPU E5-2630 v3) on kernel 5.13-rc1 using packets of size ~ 100 bytes. Scenario (1.a): average 684.70 kpps; std. dev. 0.7 kpps; Scenario (1.b): average 711.69 kpps; std. dev. 1.2 kpps; Scenario (2.a): average 690.70 kpps; std. dev. 1.2 kpps; Scenario (2.b): average 722.22 kpps; std. dev. 1.7 kpps; Scenario (3.a): average 690.02 kpps; std. dev. 2.6 kpps; Scenario (3.b): average 721.91 kpps; std. dev. 1.2 kpps; Considering the results for the patched kernel (1.a, 1.b, 2.a, 2.b) we observe that the performance degradation incurred in using End.DT46 rather than End.DT6 and End.DT4 respectively for IPv6 and IPv4 traffic is minimal, around 0.9% and 1.5%. Such very minimal performance degradation is the price to be paid if one prefers to use a single tunnel capable of handling both types of traffic (IPv4 and IPv6). Comparing the results for End.DT4 and End.DT6 under the patched and the vanilla kernel (2.a, 2.b, 3.a, 3.b) we observe that the introduction of the End.DT46 patch has no impact on the performance of End.DT4 and End.DT6. [2] https://www.cloudlab.us Signed-off-by: Andrea Mayer Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/seg6_local.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h index 5ae3ace84de0..332b18f318f8 100644 --- a/include/uapi/linux/seg6_local.h +++ b/include/uapi/linux/seg6_local.h @@ -64,6 +64,8 @@ enum { SEG6_LOCAL_ACTION_END_AM = 14, /* custom BPF action */ SEG6_LOCAL_ACTION_END_BPF = 15, + /* decap and lookup of DA in v4 or v6 table */ + SEG6_LOCAL_ACTION_END_DT46 = 16, __SEG6_LOCAL_ACTION_MAX, }; -- cgit v1.2.3 From 752e906732c69412087f716e93baa0330cb7cce3 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:07 -0700 Subject: mptcp: add csum_enabled in mptcp_sock This patch added a new member named csum_enabled in struct mptcp_sock, used a dummy mptcp_is_checksum_enabled() helper to initialize it. Also added a new member named mptcpi_csum_enabled in struct mptcp_info to expose the csum_enabled flag. Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 8eb3c0844bff..7b05f7102321 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -105,6 +105,7 @@ struct mptcp_info { __u64 mptcpi_rcv_nxt; __u8 mptcpi_local_addr_used; __u8 mptcpi_local_addr_max; + __u8 mptcpi_csum_enabled; }; /* -- cgit v1.2.3 From 321827477360934dc040e9d3c626bf1de6c3ab3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 18 Jun 2021 13:04:35 +0200 Subject: icmp: don't send out ICMP messages with a source address of 0.0.0.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When constructing ICMP response messages, the kernel will try to pick a suitable source address for the outgoing packet. However, if no IPv4 addresses are configured on the system at all, this will fail and we end up producing an ICMP message with a source address of 0.0.0.0. This can happen on a box routing IPv4 traffic via v6 nexthops, for instance. Since 0.0.0.0 is not generally routable on the internet, there's a good chance that such ICMP messages will never make it back to the sender of the original packet that the ICMP message was sent in response to. This, in turn, can create connectivity and PMTUd problems for senders. Fortunately, RFC7600 reserves a dummy address to be used as a source for ICMP messages (192.0.0.8/32), so let's teach the kernel to substitute that address as a last resort if the regular source address selection procedure fails. Below is a quick example reproducing this issue with network namespaces: ip netns add ns0 ip l add type veth peer netns ns0 ip l set dev veth0 up ip a add 10.0.0.1/24 dev veth0 ip a add fc00:dead:cafe:42::1/64 dev veth0 ip r add 10.1.0.0/24 via inet6 fc00:dead:cafe:42::2 ip -n ns0 l set dev veth0 up ip -n ns0 a add fc00:dead:cafe:42::2/64 dev veth0 ip -n ns0 r add 10.0.0.0/24 via inet6 fc00:dead:cafe:42::1 ip netns exec ns0 sysctl -w net.ipv4.icmp_ratelimit=0 ip netns exec ns0 sysctl -w net.ipv4.ip_forward=1 tcpdump -tpni veth0 -c 2 icmp & ping -w 1 10.1.0.1 > /dev/null tcpdump: verbose output suppressed, use -v[v]... for full protocol decode listening on veth0, link-type EN10MB (Ethernet), snapshot length 262144 bytes IP 10.0.0.1 > 10.1.0.1: ICMP echo request, id 29, seq 1, length 64 IP 0.0.0.0 > 10.0.0.1: ICMP net 10.1.0.1 unreachable, length 92 2 packets captured 2 packets received by filter 0 packets dropped by kernel With this patch the above capture changes to: IP 10.0.0.1 > 10.1.0.1: ICMP echo request, id 31127, seq 1, length 64 IP 192.0.0.8 > 10.0.0.1: ICMP net 10.1.0.1 unreachable, length 92 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Juliusz Chroboczek Reviewed-by: David Ahern Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 7d6687618d80..d1b327036ae4 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -289,6 +289,9 @@ struct sockaddr_in { /* Address indicating an error return. */ #define INADDR_NONE ((unsigned long int) 0xffffffff) +/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */ +#define INADDR_DUMMY ((unsigned long int) 0xc0000008) + /* Network number for local host loopback. */ #define IN_LOOPBACKNET 127 -- cgit v1.2.3 From 1f3c98eaddec857e16a7a1c6cd83317b3dc89438 Mon Sep 17 00:00:00 2001 From: Yejune Deng Date: Fri, 18 Jun 2021 22:32:47 +0800 Subject: net: add pf_family_names[] for protocol family Modify the pr_info content from int to char *, this looks more readable. Signed-off-by: Yejune Deng Signed-off-by: David S. Miller --- include/uapi/linux/net.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/net.h b/include/uapi/linux/net.h index 4dabec6bd957..a28caaf620c7 100644 --- a/include/uapi/linux/net.h +++ b/include/uapi/linux/net.h @@ -55,4 +55,52 @@ typedef enum { #define __SO_ACCEPTCON (1 << 16) /* performed a listen */ +static const char * const pf_family_names[] = { + [PF_UNSPEC] = "PF_UNSPEC", + [PF_UNIX] = "PF_UNIX/PF_LOCAL", + [PF_INET] = "PF_INET", + [PF_AX25] = "PF_AX25", + [PF_IPX] = "PF_IPX", + [PF_APPLETALK] = "PF_APPLETALK", + [PF_NETROM] = "PF_NETROM", + [PF_BRIDGE] = "PF_BRIDGE", + [PF_ATMPVC] = "PF_ATMPVC", + [PF_X25] = "PF_X25", + [PF_INET6] = "PF_INET6", + [PF_ROSE] = "PF_ROSE", + [PF_DECnet] = "PF_DECnet", + [PF_NETBEUI] = "PF_NETBEUI", + [PF_SECURITY] = "PF_SECURITY", + [PF_KEY] = "PF_KEY", + [PF_NETLINK] = "PF_NETLINK/PF_ROUTE", + [PF_PACKET] = "PF_PACKET", + [PF_ASH] = "PF_ASH", + [PF_ECONET] = "PF_ECONET", + [PF_ATMSVC] = "PF_ATMSVC", + [PF_RDS] = "PF_RDS", + [PF_SNA] = "PF_SNA", + [PF_IRDA] = "PF_IRDA", + [PF_PPPOX] = "PF_PPPOX", + [PF_WANPIPE] = "PF_WANPIPE", + [PF_LLC] = "PF_LLC", + [PF_IB] = "PF_IB", + [PF_MPLS] = "PF_MPLS", + [PF_CAN] = "PF_CAN", + [PF_TIPC] = "PF_TIPC", + [PF_BLUETOOTH] = "PF_BLUETOOTH", + [PF_IUCV] = "PF_IUCV", + [PF_RXRPC] = "PF_RXRPC", + [PF_ISDN] = "PF_ISDN", + [PF_PHONET] = "PF_PHONET", + [PF_IEEE802154] = "PF_IEEE802154", + [PF_CAIF] = "PF_CAIF", + [PF_ALG] = "PF_ALG", + [PF_NFC] = "PF_NFC", + [PF_VSOCK] = "PF_VSOCK", + [PF_KCM] = "PF_KCM", + [PF_QIPCRTR] = "PF_QIPCRTR", + [PF_SMC] = "PF_SMC", + [PF_XDP] = "PF_XDP", +}; + #endif /* _UAPI_LINUX_NET_H */ -- cgit v1.2.3 From 103ebe658a262ef5b5db7f01d83857cf82a087d0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 18 Jun 2021 13:02:45 -0700 Subject: Revert "net: add pf_family_names[] for protocol family" This reverts commit 1f3c98eaddec857e16a7a1c6cd83317b3dc89438. Does not build... Signed-off-by: David S. Miller --- include/uapi/linux/net.h | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/net.h b/include/uapi/linux/net.h index a28caaf620c7..4dabec6bd957 100644 --- a/include/uapi/linux/net.h +++ b/include/uapi/linux/net.h @@ -55,52 +55,4 @@ typedef enum { #define __SO_ACCEPTCON (1 << 16) /* performed a listen */ -static const char * const pf_family_names[] = { - [PF_UNSPEC] = "PF_UNSPEC", - [PF_UNIX] = "PF_UNIX/PF_LOCAL", - [PF_INET] = "PF_INET", - [PF_AX25] = "PF_AX25", - [PF_IPX] = "PF_IPX", - [PF_APPLETALK] = "PF_APPLETALK", - [PF_NETROM] = "PF_NETROM", - [PF_BRIDGE] = "PF_BRIDGE", - [PF_ATMPVC] = "PF_ATMPVC", - [PF_X25] = "PF_X25", - [PF_INET6] = "PF_INET6", - [PF_ROSE] = "PF_ROSE", - [PF_DECnet] = "PF_DECnet", - [PF_NETBEUI] = "PF_NETBEUI", - [PF_SECURITY] = "PF_SECURITY", - [PF_KEY] = "PF_KEY", - [PF_NETLINK] = "PF_NETLINK/PF_ROUTE", - [PF_PACKET] = "PF_PACKET", - [PF_ASH] = "PF_ASH", - [PF_ECONET] = "PF_ECONET", - [PF_ATMSVC] = "PF_ATMSVC", - [PF_RDS] = "PF_RDS", - [PF_SNA] = "PF_SNA", - [PF_IRDA] = "PF_IRDA", - [PF_PPPOX] = "PF_PPPOX", - [PF_WANPIPE] = "PF_WANPIPE", - [PF_LLC] = "PF_LLC", - [PF_IB] = "PF_IB", - [PF_MPLS] = "PF_MPLS", - [PF_CAN] = "PF_CAN", - [PF_TIPC] = "PF_TIPC", - [PF_BLUETOOTH] = "PF_BLUETOOTH", - [PF_IUCV] = "PF_IUCV", - [PF_RXRPC] = "PF_RXRPC", - [PF_ISDN] = "PF_ISDN", - [PF_PHONET] = "PF_PHONET", - [PF_IEEE802154] = "PF_IEEE802154", - [PF_CAIF] = "PF_CAIF", - [PF_ALG] = "PF_ALG", - [PF_NFC] = "PF_NFC", - [PF_VSOCK] = "PF_VSOCK", - [PF_KCM] = "PF_KCM", - [PF_QIPCRTR] = "PF_QIPCRTR", - [PF_SMC] = "PF_SMC", - [PF_XDP] = "PF_XDP", -}; - #endif /* _UAPI_LINUX_NET_H */ -- cgit v1.2.3 From 879740517daba2a1d3229f8a54fc2b1cb78a4f07 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 17 Jun 2021 01:58:17 +0530 Subject: RDMA/bnxt_re: Update ABI to pass wqe-mode to user space Changing ucontext ABI response structure to pass wqe_mode to user library. A flag in comp_mask has been set to indicate presence of wqe_mode. Moved wqe-mode ABI to uapi/rdma/bnxt_re-abi.h Link: https://lore.kernel.org/r/20210616202817.1185276-1-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/bnxt_re-abi.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index dc52e3cf574c..b1de99bf56ce 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -49,7 +49,14 @@ #define BNXT_RE_CHIP_ID0_CHIP_MET_SFT 0x18 enum { - BNXT_RE_UCNTX_CMASK_HAVE_CCTX = 0x1ULL + BNXT_RE_UCNTX_CMASK_HAVE_CCTX = 0x1ULL, + BNXT_RE_UCNTX_CMASK_HAVE_MODE = 0x02ULL, +}; + +enum bnxt_re_wqe_mode { + BNXT_QPLIB_WQE_MODE_STATIC = 0x00, + BNXT_QPLIB_WQE_MODE_VARIABLE = 0x01, + BNXT_QPLIB_WQE_MODE_INVALID = 0x02, }; struct bnxt_re_uctx_resp { @@ -62,6 +69,8 @@ struct bnxt_re_uctx_resp { __aligned_u64 comp_mask; __u32 chip_id0; __u32 chip_id1; + __u32 mode; + __u32 rsvd1; /* padding */ }; /* -- cgit v1.2.3 From 2d82ab251ef0f6e7716279b04e9b5a01a86ca530 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 20 May 2021 17:46:54 +0200 Subject: virtiofs: propagate sync() to file server Even if POSIX doesn't mandate it, linux users legitimately expect sync() to flush all data and metadata to physical storage when it is located on the same system. This isn't happening with virtiofs though: sync() inside the guest returns right away even though data still needs to be flushed from the host page cache. This is easily demonstrated by doing the following in the guest: $ dd if=/dev/zero of=/mnt/foo bs=1M count=5K ; strace -T -e sync sync 5120+0 records in 5120+0 records out 5368709120 bytes (5.4 GB, 5.0 GiB) copied, 5.22224 s, 1.0 GB/s sync() = 0 <0.024068> and start the following in the host when the 'dd' command completes in the guest: $ strace -T -e fsync /usr/bin/sync virtiofs/foo fsync(3) = 0 <10.371640> There are no good reasons not to honor the expected behavior of sync() actually: it gives an unrealistic impression that virtiofs is super fast and that data has safely landed on HW, which isn't the case obviously. Implement a ->sync_fs() superblock operation that sends a new FUSE_SYNCFS request type for this purpose. Provision a 64-bit placeholder for possible future extensions. Since the file server cannot handle the wait == 0 case, we skip it to avoid a gratuitous roundtrip. Note that this is per-superblock: a FUSE_SYNCFS is send for the root mount and for each submount. Like with FUSE_FSYNC and FUSE_FSYNCDIR, lack of support for FUSE_SYNCFS in the file server is treated as permanent success. This ensures compatibility with older file servers: the client will get the current behavior of sync() not being propagated to the file server. Note that such an operation allows the file server to DoS sync(). Since a typical FUSE file server is an untrusted piece of software running in userspace, this is disabled by default. Only enable it with virtiofs for now since virtiofsd is supposedly trusted by the guest kernel. Reported-by: Robert Krawitz Signed-off-by: Greg Kurz Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 271ae90a9bb7..36ed092227fa 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -181,6 +181,9 @@ * - add FUSE_OPEN_KILL_SUIDGID * - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT * - add FUSE_SETXATTR_ACL_KILL_SGID + * + * 7.34 + * - add FUSE_SYNCFS */ #ifndef _LINUX_FUSE_H @@ -216,7 +219,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 33 +#define FUSE_KERNEL_MINOR_VERSION 34 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -509,6 +512,7 @@ enum fuse_opcode { FUSE_COPY_FILE_RANGE = 47, FUSE_SETUPMAPPING = 48, FUSE_REMOVEMAPPING = 49, + FUSE_SYNCFS = 50, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -971,4 +975,8 @@ struct fuse_removemapping_one { #define FUSE_REMOVEMAPPING_MAX_ENTRY \ (PAGE_SIZE / sizeof(struct fuse_removemapping_one)) +struct fuse_syncfs_in { + uint64_t padding; +}; + #endif /* _LINUX_FUSE_H */ -- cgit v1.2.3 From 1a9fd4172d5c8ba64735b3aef7eed643d398ce05 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 21 May 2021 17:42:23 +0200 Subject: btrfs: fix typos in comments Fix typos that have snuck in since the last round. Found by codespell. Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 4 ++-- include/uapi/linux/btrfs_tree.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 5df73001aad4..22cd037123fa 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -154,7 +154,7 @@ struct btrfs_scrub_progress { __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */ __u64 read_errors; /* # of read errors encountered (EIO) */ __u64 csum_errors; /* # of failed csum checks */ - __u64 verify_errors; /* # of occurences, where the metadata + __u64 verify_errors; /* # of occurrences, where the metadata * of a tree block did not match the * expected values, like generation or * logical */ @@ -174,7 +174,7 @@ struct btrfs_scrub_progress { __u64 last_physical; /* last physical address scrubbed. In * case a scrub was aborted, this can * be used to restart the scrub */ - __u64 unverified_errors; /* # of occurences where a read for a + __u64 unverified_errors; /* # of occurrences where a read for a * full (64k) bio failed, but the re- * check succeeded for each 4k piece. * Intermittent error. */ diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 58d7cff9afb1..ccdb40fe40dc 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -59,7 +59,7 @@ /* for storing balance parameters in the root tree */ #define BTRFS_BALANCE_OBJECTID -4ULL -/* orhpan objectid for tracking unlinked/truncated files */ +/* orphan objectid for tracking unlinked/truncated files */ #define BTRFS_ORPHAN_OBJECTID -5ULL /* does write ahead logging to speed up fsyncs */ @@ -275,7 +275,7 @@ #define BTRFS_PERSISTENT_ITEM_KEY 249 /* - * Persistantly stores the device replace state in the device tree. + * Persistently stores the device replace state in the device tree. * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0). */ #define BTRFS_DEV_REPLACE_KEY 250 -- cgit v1.2.3 From ea7fc1bb1cd1b92b42b1d9273ce7e231d3dc9321 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:12 +0100 Subject: KVM: arm64: Introduce MTE VM feature Add a new VM feature 'KVM_ARM_CAP_MTE' which enables memory tagging for a VM. This will expose the feature to the guest and automatically tag memory pages touched by the VM as PG_mte_tagged (and clear the tag storage) to ensure that the guest cannot see stale tags, and so that the tags are correctly saved/restored across swap. Actually exposing the new capability to user space happens in a later patch. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price [maz: move VM_SHARED sampling into the critical section] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-3-steven.price@arm.com --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 79d9c44d1ad7..d4da58ddcad7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1083,6 +1083,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 #define KVM_CAP_PTP_KVM 198 +#define KVM_CAP_ARM_MTE 199 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From f0376edb1ddcab19a473b4bf1fbd5b6bbed3705b Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:15 +0100 Subject: KVM: arm64: Add ioctl to fetch/store tags in a guest The VMM may not wish to have it's own mapping of guest memory mapped with PROT_MTE because this causes problems if the VMM has tag checking enabled (the guest controls the tags in physical RAM and it's unlikely the tags are correct for the VMM). Instead add a new ioctl which allows the VMM to easily read/write the tags from guest memory, allowing the VMM's mapping to be non-PROT_MTE while the VMM can still read/write the tags for the purpose of migration. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-6-steven.price@arm.com --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d4da58ddcad7..da1edd2b4046 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1429,6 +1429,7 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_PMU_EVENT_FILTER */ #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) +#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) -- cgit v1.2.3 From b87cc116c7e1bc62a84d8c46acd401db179edb11 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 21 Jun 2021 14:20:02 +0530 Subject: KVM: PPC: Book3S HV: Add KVM_CAP_PPC_RPT_INVALIDATE capability Now that we have H_RPT_INVALIDATE fully implemented, enable support for the same via KVM_CAP_PPC_RPT_INVALIDATE KVM capability Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210621085003.904767-6-bharata@linux.ibm.com --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3fd9a7e9d90c..613198a94c43 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1082,6 +1082,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 #define KVM_CAP_PTP_KVM 198 +#define KVM_CAP_PPC_RPT_INVALIDATE 199 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From bf22a6976897977b0a3f1aeba6823c959fc4fdae Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Apr 2021 21:44:23 +0200 Subject: futex: Provide FUTEX_LOCK_PI2 to support clock selection The FUTEX_LOCK_PI futex operand uses a CLOCK_REALTIME based absolute timeout since it was implemented, but it does not require that the FUTEX_CLOCK_REALTIME flag is set, because that was introduced later. In theory as none of the user space implementations can set the FUTEX_CLOCK_REALTIME flag on this operand, it would be possible to creatively abuse it and make the meaning invers, i.e. select CLOCK_REALTIME when not set and CLOCK_MONOTONIC when set. But that's a nasty hackery. Another option would be to have a new FUTEX_CLOCK_MONOTONIC flag only for FUTEX_LOCK_PI, but that's also awkward because it does not allow libraries to handle the timeout clock selection consistently. So provide a new FUTEX_LOCK_PI2 operand which implements the timeout semantics which the other operands use and leave FUTEX_LOCK_PI alone. Reported-by: Kurt Kanzenbach Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210422194705.440773992@linutronix.de --- include/uapi/linux/futex.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index a89eb0accd5e..235e5b2facaa 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -21,6 +21,7 @@ #define FUTEX_WAKE_BITSET 10 #define FUTEX_WAIT_REQUEUE_PI 11 #define FUTEX_CMP_REQUEUE_PI 12 +#define FUTEX_LOCK_PI2 13 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -32,6 +33,7 @@ #define FUTEX_CMP_REQUEUE_PRIVATE (FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_OP_PRIVATE (FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG) #define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) +#define FUTEX_LOCK_PI2_PRIVATE (FUTEX_LOCK_PI2 | FUTEX_PRIVATE_FLAG) #define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG) -- cgit v1.2.3 From 6d33cabf2baf304730d01a942095416b3a8329ab Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Jun 2021 13:26:15 -0700 Subject: RDMA/core: Use flexible array for mad data In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally reading across neighboring array fields. Without a flexible array, this looks like an attempt to perform a memcpy() read beyond the end of the packet->mad.data array: drivers/infiniband/core/user_mad.c: memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); Switch from [0] to [] to use the appropriately handled type for trailing bytes. Link: https://lore.kernel.org/r/20210616202615.1247242-1-keescook@chromium.org Signed-off-by: Kees Cook Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/ib_user_mad.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/rdma/ib_user_mad.h b/include/uapi/rdma/ib_user_mad.h index 90c0cf228020..10b5f6a4c677 100644 --- a/include/uapi/rdma/ib_user_mad.h +++ b/include/uapi/rdma/ib_user_mad.h @@ -143,7 +143,7 @@ struct ib_user_mad_hdr { */ struct ib_user_mad { struct ib_user_mad_hdr hdr; - __aligned_u64 data[0]; + __aligned_u64 data[]; }; /* -- cgit v1.2.3 From 336529518e9724d4cecabc622e57bcdce02e7c61 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Wed, 16 Jun 2021 10:57:39 +0300 Subject: RDMA/mlx5: Support real-time timestamp directly from the device Currently, if the user asks for a real-time timestamp, the device will return a free-running one, and the timestamp will be translated to real-time in the user-space. When the device supports only real-time timestamp and not free-running, the creation of the QP will fail even though the user needs supported the real-time one. To prevent this, we will return the real-time timestamp directly from the device. Link: https://lore.kernel.org/r/c6cfc8e6f038575c5c2de6505830f7e74e4de80d.1623829775.git.leonro@nvidia.com Signed-off-by: Aharon Landau Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/mlx5-abi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 27905a0268c9..82e3bc1eb57d 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -101,6 +101,7 @@ enum mlx5_ib_alloc_ucontext_resp_mask { MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY = 1UL << 1, MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE = 1UL << 2, + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_REAL_TIME_TS = 1UL << 4, }; enum mlx5_user_cmds_supp_uhw { @@ -270,6 +271,7 @@ struct mlx5_ib_query_device_resp { enum mlx5_ib_create_cq_flags { MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD = 1 << 0, MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX = 1 << 1, + MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS = 1 << 2, }; struct mlx5_ib_create_cq { -- cgit v1.2.3 From 913d026fbfaf114ff87afcc77fa4e9309f87f114 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 22 Jun 2021 09:50:47 +0300 Subject: ethtool: Document correct attribute type 'ETHTOOL_A_MODULE_EEPROM_DATA' is a binary attribute, not a nested one. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 825cfda1c5d5..c7135c9c37a5 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -675,7 +675,7 @@ enum { ETHTOOL_A_MODULE_EEPROM_PAGE, /* u8 */ ETHTOOL_A_MODULE_EEPROM_BANK, /* u8 */ ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_DATA, /* nested */ + ETHTOOL_A_MODULE_EEPROM_DATA, /* binary */ __ETHTOOL_A_MODULE_EEPROM_CNT, ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1) -- cgit v1.2.3 From 3190b649b4d9391be7bde3edd8e924e451c5d2f6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 22 Jun 2021 14:04:49 -0400 Subject: sctp: add SCTP_PLPMTUD_PROBE_INTERVAL sockopt for sock/asoc/transport With this socket option, users can change probe_interval for a transport, asoc or sock after it's created. Note that if the change is for an asoc, also apply the change to each transport in this asoc. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index cb78e7a739da..c4ff1ebd8bcc 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -141,6 +141,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_EXPOSE_POTENTIALLY_FAILED_STATE 131 #define SCTP_EXPOSE_PF_STATE SCTP_EXPOSE_POTENTIALLY_FAILED_STATE #define SCTP_REMOTE_UDP_ENCAPS_PORT 132 +#define SCTP_PLPMTUD_PROBE_INTERVAL 133 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -1213,4 +1214,11 @@ enum sctp_sched_type { SCTP_SS_MAX = SCTP_SS_RR }; +/* Probe Interval socket option */ +struct sctp_probeinterval { + sctp_assoc_t spi_assoc_id; + struct sockaddr_storage spi_address; + __u32 spi_interval; +}; + #endif /* _UAPI_SCTP_H */ -- cgit v1.2.3 From dd3e4fc75b4ab8186a133cfe9d49666a2f8186e0 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Fri, 18 Jun 2021 13:41:36 +0300 Subject: nl80211/cfg80211: add BSS color to NDP ranging parameters In NDP ranging, the initiator need to set the BSS color in the NDP to the BSS color of the responder. Add the BSS color as a parameter for NDP ranging. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20210618133832.f097a6144b59.I27dec8b994df52e691925ea61be4dd4fa6d396c0@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f962c06e9818..771f238ccff1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -11,7 +11,7 @@ * Copyright 2008 Jouni Malinen * Copyright 2008 Colin McCabe * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -6912,6 +6912,9 @@ enum nl80211_peer_measurement_ftm_capa { * @NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK: negotiate for LMR feedback. Only * valid if either %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED or * %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED is set. + * @NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR: optional. The BSS color of the + * responder. Only valid if %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED + * or %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED is set. * * @NUM_NL80211_PMSR_FTM_REQ_ATTR: internal * @NL80211_PMSR_FTM_REQ_ATTR_MAX: highest attribute number @@ -6931,6 +6934,7 @@ enum nl80211_peer_measurement_ftm_req { NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED, NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED, NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK, + NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR, /* keep last */ NUM_NL80211_PMSR_FTM_REQ_ATTR, -- cgit v1.2.3 From f4f8650588d35deafaa4a4e28cceb3557a71e711 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 18 Jun 2021 13:41:52 +0300 Subject: cfg80211: allow advertising vendor-specific capabilities There may be cases where vendor-specific elements need to be used over the air. Rather than have driver or firmware add them and possibly cause problems that way, add them to the iftype-data band capabilities. This way we can advertise to userspace first, and use them in mac80211 next. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20210618133832.e8c4f0347276.Iee5964682b3e9ec51fc1cd57a7c62383eaf6ddd7@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 771f238ccff1..db474994fa73 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3654,6 +3654,8 @@ enum nl80211_mpath_info { * defined * @NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA: HE 6GHz band capabilities (__le16), * given for all 6 GHz band channels + * @NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS: vendor element capabilities that are + * advertised on this band/for this iftype (binary) * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use */ enum nl80211_band_iftype_attr { @@ -3665,6 +3667,7 @@ enum nl80211_band_iftype_attr { NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE, NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA, + NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS, /* keep last */ __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST, -- cgit v1.2.3 From d12e339044a00ecae993b06672c38c168a92f0c3 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Fri, 23 Apr 2021 15:08:20 -0400 Subject: drm/msm: add MSM_BO_CACHED_COHERENT Add a new cache mode for creating coherent host-cached BOs. Signed-off-by: Jonathan Marek Reviewed-by: Jordan Crouse Link: https://lore.kernel.org/r/20210423190833.25319-5-jonathan@marek.ca Signed-off-by: Rob Clark --- include/uapi/drm/msm_drm.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 5596d7c37f9e..a92d90a6d96f 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -95,12 +95,11 @@ struct drm_msm_param { #define MSM_BO_CACHED 0x00010000 #define MSM_BO_WC 0x00020000 #define MSM_BO_UNCACHED 0x00040000 +#define MSM_BO_CACHED_COHERENT 0x080000 #define MSM_BO_FLAGS (MSM_BO_SCANOUT | \ MSM_BO_GPU_READONLY | \ - MSM_BO_CACHED | \ - MSM_BO_WC | \ - MSM_BO_UNCACHED) + MSM_BO_CACHE_MASK) struct drm_msm_gem_new { __u64 size; /* in */ -- cgit v1.2.3 From 9ef364432db4a11ff2dbee398d7ed06e93bdfe5e Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Fri, 23 Apr 2021 15:08:21 -0400 Subject: drm/msm: deprecate MSM_BO_UNCACHED (map as writecombine instead) There shouldn't be any reason to ever use uncached over writecombine, so just use writecombine for MSM_BO_UNCACHED. Note: userspace never used MSM_BO_UNCACHED anyway Signed-off-by: Jonathan Marek Acked-by: Jordan Crouse Link: https://lore.kernel.org/r/20210423190833.25319-6-jonathan@marek.ca Signed-off-by: Rob Clark --- include/uapi/drm/msm_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index a92d90a6d96f..f075851021c3 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -94,7 +94,7 @@ struct drm_msm_param { /* cache modes */ #define MSM_BO_CACHED 0x00010000 #define MSM_BO_WC 0x00020000 -#define MSM_BO_UNCACHED 0x00040000 +#define MSM_BO_UNCACHED 0x00040000 /* deprecated, use MSM_BO_WC */ #define MSM_BO_CACHED_COHERENT 0x080000 #define MSM_BO_FLAGS (MSM_BO_SCANOUT | \ -- cgit v1.2.3 From 55d444b310c64b084dcc62ba3e4dc3862269fb96 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 23 Jun 2021 08:35:29 +0900 Subject: tcp: Add stats for socket migration. This commit adds two stats for the socket migration feature to evaluate the effectiveness: LINUX_MIB_TCPMIGRATEREQ(SUCCESS|FAILURE). If the migration fails because of the own_req race in receiving ACK and sending SYN+ACK paths, we do not increment the failure stat. Then another CPU is responsible for the req. Link: https://lore.kernel.org/bpf/CAK6E8=cgFKuGecTzSCSQ8z3YJ_163C0uwO9yRvfDSE7vOe9mJA@mail.gmail.com/ Suggested-by: Yuchung Cheng Signed-off-by: Kuniyuki Iwashima Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 26fc60ce9298..904909d020e2 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -290,6 +290,8 @@ enum LINUX_MIB_TCPDUPLICATEDATAREHASH, /* TCPDuplicateDataRehash */ LINUX_MIB_TCPDSACKRECVSEGS, /* TCPDSACKRecvSegs */ LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */ + LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */ + LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */ __LINUX_MIB_MAX }; -- cgit v1.2.3 From cb082bfab59a224a49ae803fed52cd03e8d6b5e0 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Fri, 18 Jun 2021 22:27:04 +0000 Subject: KVM: stats: Add fd-based API to read binary stats data This commit defines the API for userspace and prepare the common functionalities to support per VM/VCPU binary stats data readings. The KVM stats now is only accessible by debugfs, which has some shortcomings this change series are supposed to fix: 1. The current debugfs stats solution in KVM could be disabled when kernel Lockdown mode is enabled, which is a potential rick for production. 2. The current debugfs stats solution in KVM is organized as "one stats per file", it is good for debugging, but not efficient for production. 3. The stats read/clear in current debugfs solution in KVM are protected by the global kvm_lock. Besides that, there are some other benefits with this change: 1. All KVM VM/VCPU stats can be read out in a bulk by one copy to userspace. 2. A schema is used to describe KVM statistics. From userspace's perspective, the KVM statistics are self-describing. 3. With the fd-based solution, a separate telemetry would be able to read KVM stats in a less privileged environment. 4. After the initial setup by reading in stats descriptors, a telemetry only needs to read the stats data itself, no more parsing or setup is needed. Reviewed-by: David Matlack Reviewed-by: Ricardo Koller Reviewed-by: Krish Sadhukhan Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba #arm64 Signed-off-by: Jing Zhang Message-Id: <20210618222709.1858088-3-jingzhangos@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 330835f1005b..f1ba602260f6 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1087,6 +1087,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SREGS2 200 #define KVM_CAP_EXIT_HYPERCALL 201 #define KVM_CAP_PPC_RPT_INVALIDATE 202 +#define KVM_CAP_BINARY_STATS_FD 203 #ifdef KVM_CAP_IRQ_ROUTING @@ -1906,4 +1907,76 @@ struct kvm_dirty_gfn { #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) +/** + * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. + * @flags: Some extra information for header, always 0 for now. + * @name_size: The size in bytes of the memory which contains statistics + * name string including trailing '\0'. The memory is allocated + * at the send of statistics descriptor. + * @num_desc: The number of statistics the vm or vcpu has. + * @id_offset: The offset of the vm/vcpu stats' id string in the file pointed + * by vm/vcpu stats fd. + * @desc_offset: The offset of the vm/vcpu stats' descriptor block in the file + * pointd by vm/vcpu stats fd. + * @data_offset: The offset of the vm/vcpu stats' data block in the file + * pointed by vm/vcpu stats fd. + * + * This is the header userspace needs to read from stats fd before any other + * readings. It is used by userspace to discover all the information about the + * vm/vcpu's binary statistics. + * Userspace reads this header from the start of the vm/vcpu's stats fd. + */ +struct kvm_stats_header { + __u32 flags; + __u32 name_size; + __u32 num_desc; + __u32 id_offset; + __u32 desc_offset; + __u32 data_offset; +}; + +#define KVM_STATS_TYPE_SHIFT 0 +#define KVM_STATS_TYPE_MASK (0xF << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK + +#define KVM_STATS_UNIT_SHIFT 4 +#define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_NONE (0x0 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES + +#define KVM_STATS_BASE_SHIFT 8 +#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_POW10 (0x0 << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_POW2 (0x1 << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_MAX KVM_STATS_BASE_POW2 + +/** + * struct kvm_stats_desc - Descriptor of a KVM statistics. + * @flags: Annotations of the stats, like type, unit, etc. + * @exponent: Used together with @flags to determine the unit. + * @size: The number of data items for this stats. + * Every data item is of type __u64. + * @offset: The offset of the stats to the start of stat structure in + * struture kvm or kvm_vcpu. + * @unused: Unused field for future usage. Always 0 for now. + * @name: The name string for the stats. Its size is indicated by the + * &kvm_stats_header->name_size. + */ +struct kvm_stats_desc { + __u32 flags; + __s16 exponent; + __u16 size; + __u32 offset; + __u32 unused; + char name[]; +}; + +#define KVM_GET_STATS_FD _IO(KVMIO, 0xce) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From e8b9eab99232c4e62ada9d7976c80fd5e8118289 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Wed, 23 Jun 2021 15:56:45 +0200 Subject: net: retrieve netns cookie via getsocketopt It's getting more common to run nested container environments for testing cloud software. One of such examples is Kind [1] which runs a Kubernetes cluster in Docker containers on a single host. Each container acts as a Kubernetes node, and thus can run any Pod (aka container) inside the former. This approach simplifies testing a lot, as it eliminates complicated VM setups. Unfortunately, such a setup breaks some functionality when cgroupv2 BPF programs are used for load-balancing. The load-balancer BPF program needs to detect whether a request originates from the host netns or a container netns in order to allow some access, e.g. to a service via a loopback IP address. Typically, the programs detect this by comparing netns cookies with the one of the init ns via a call to bpf_get_netns_cookie(NULL). However, in nested environments the latter cannot be used given the Kubernetes node's netns is outside the init ns. To fix this, we need to pass the Kubernetes node netns cookie to the program in a different way: by extending getsockopt() with a SO_NETNS_COOKIE option, the orchestrator which runs in the Kubernetes node netns can retrieve the cookie and pass it to the program instead. Thus, this is following up on Eric's commit 3d368ab87cf6 ("net: initialize net->net_cookie at netns setup") to allow retrieval via SO_NETNS_COOKIE. This is also in line in how we retrieve socket cookie via SO_COOKIE. [1] https://kind.sigs.k8s.io/ Signed-off-by: Lorenz Bauer Signed-off-by: Martynas Pumputis Cc: Eric Dumazet Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/asm-generic/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 4dcd13d097a9..d588c244ec2f 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -122,6 +122,8 @@ #define SO_PREFER_BUSY_POLL 69 #define SO_BUSY_POLL_BUDGET 70 +#define SO_NETNS_COOKIE 71 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) -- cgit v1.2.3 From 19238e75bd8ed8ffe784bf5b37586e77b2093742 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 10 May 2021 07:48:33 -0700 Subject: kvm: x86: Allow userspace to handle emulation errors Add a fallback mechanism to the in-kernel instruction emulator that allows userspace the opportunity to process an instruction the emulator was unable to. When the in-kernel instruction emulator fails to process an instruction it will either inject a #UD into the guest or exit to userspace with exit reason KVM_INTERNAL_ERROR. This is because it does not know how to proceed in an appropriate manner. This feature lets userspace get involved to see if it can figure out a better path forward. Signed-off-by: Aaron Lewis Reviewed-by: David Edmondson Message-Id: <20210510144834.658457-2-aaronlewis@google.com> Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f1ba602260f6..68c9e6d8bbda 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -280,6 +280,9 @@ struct kvm_xen_exit { /* Encounter unexpected vm-exit reason */ #define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4 +/* Flags that describe what fields in emulation_failure hold valid data. */ +#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ @@ -383,6 +386,25 @@ struct kvm_run { __u32 ndata; __u64 data[16]; } internal; + /* + * KVM_INTERNAL_ERROR_EMULATION + * + * "struct emulation_failure" is an overlay of "struct internal" + * that is used for the KVM_INTERNAL_ERROR_EMULATION sub-type of + * KVM_EXIT_INTERNAL_ERROR. Note, unlike other internal error + * sub-types, this struct is ABI! It also needs to be backwards + * compatible with "struct internal". Take special care that + * "ndata" is correct, that new fields are enumerated in "flags", + * and that each flag enumerates fields that are 64-bit aligned + * and sized (so that ndata+internal.data[] is valid/accurate). + */ + struct { + __u32 suberror; + __u32 ndata; + __u64 flags; + __u8 insn_size; + __u8 insn_bytes[15]; + } emulation_failure; /* KVM_EXIT_OSI */ struct { __u64 gprs[32]; @@ -1088,6 +1110,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_EXIT_HYPERCALL 201 #define KVM_CAP_PPC_RPT_INVALIDATE 202 #define KVM_CAP_BINARY_STATS_FD 203 +#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 808e9df477757955a9644ca323010339be0c40ee Mon Sep 17 00:00:00 2001 From: Gleb Fotengauer-Malinovskiy Date: Fri, 25 Jun 2021 20:36:55 +0300 Subject: userfaultfd: uapi: fix UFFDIO_CONTINUE ioctl request definition This ioctl request reads from uffdio_continue structure written by userspace which justifies _IOC_WRITE flag. It also writes back to that structure which justifies _IOC_READ flag. See NOTEs in include/uapi/asm-generic/ioctl.h for more information. Fixes: f619147104c8 ("userfaultfd: add UFFDIO_CONTINUE ioctl") Signed-off-by: Gleb Fotengauer-Malinovskiy Acked-by: Peter Xu Reviewed-by: Axel Rasmussen Reviewed-by: Dmitry V. Levin Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index bafbeb1a2624..650480f41f1d 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -80,8 +80,8 @@ struct uffdio_zeropage) #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ struct uffdio_writeprotect) -#define UFFDIO_CONTINUE _IOR(UFFDIO, _UFFDIO_CONTINUE, \ - struct uffdio_continue) +#define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ + struct uffdio_continue) /* read() structure */ struct uffd_msg { -- cgit v1.2.3 From 0ae71c7720e3ae3aabd2e8a072d27f7bd173d25c Mon Sep 17 00:00:00 2001 From: Rodrigo Campos Date: Mon, 17 May 2021 12:39:07 -0700 Subject: seccomp: Support atomic "addfd + send reply" Alban Crequy reported a race condition userspace faces when we want to add some fds and make the syscall return them[1] using seccomp notify. The problem is that currently two different ioctl() calls are needed by the process handling the syscalls (agent) for another userspace process (target): SECCOMP_IOCTL_NOTIF_ADDFD to allocate the fd and SECCOMP_IOCTL_NOTIF_SEND to return that value. Therefore, it is possible for the agent to do the first ioctl to add a file descriptor but the target is interrupted (EINTR) before the agent does the second ioctl() call. This patch adds a flag to the ADDFD ioctl() so it adds the fd and returns that value atomically to the target program, as suggested by Kees Cook[2]. This is done by simply allowing seccomp_do_user_notification() to add the fd and return it in this case. Therefore, in this case the target wakes up from the wait in seccomp_do_user_notification() either to interrupt the syscall or to add the fd and return it. This "allocate an fd and return" functionality is useful for syscalls that return a file descriptor only, like connect(2). Other syscalls that return a file descriptor but not as return value (or return more than one fd), like socketpair(), pipe(), recvmsg with SCM_RIGHTs, will not work with this flag. This effectively combines SECCOMP_IOCTL_NOTIF_ADDFD and SECCOMP_IOCTL_NOTIF_SEND into an atomic opteration. The notification's return value, nor error can be set by the user. Upon successful invocation of the SECCOMP_IOCTL_NOTIF_ADDFD ioctl with the SECCOMP_ADDFD_FLAG_SEND flag, the notifying process's errno will be 0, and the return value will be the file descriptor number that was installed. [1]: https://lore.kernel.org/lkml/CADZs7q4sw71iNHmV8EOOXhUKJMORPzF7thraxZYddTZsxta-KQ@mail.gmail.com/ [2]: https://lore.kernel.org/lkml/202012011322.26DCBC64F2@keescook/ Signed-off-by: Rodrigo Campos Signed-off-by: Sargun Dhillon Acked-by: Tycho Andersen Acked-by: Christian Brauner Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210517193908.3113-4-sargun@sargun.me --- include/uapi/linux/seccomp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 6ba18b82a02e..78074254ab98 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -115,6 +115,7 @@ struct seccomp_notif_resp { /* valid flags for seccomp_notif_addfd */ #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ +#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */ /** * struct seccomp_notif_addfd -- cgit v1.2.3 From 9ba6a1c06279ce499fcf755d8134d679a1f3b4ed Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 24 Jun 2021 15:09:59 +0100 Subject: io_uring: simplify struct io_uring_sqe layout Flatten struct io_uring_sqe, the last union is exactly 64B, so move them out of union { struct { ... }}, and decrease __pad2 size. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/2e21ef7aed136293d654450bc3088973a8adc730.1624543113.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index f1f9ac114b51..79126d5cd289 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -46,21 +46,17 @@ struct io_uring_sqe { __u32 unlink_flags; }; __u64 user_data; /* data to be passed back at completion time */ + /* pack this to avoid bogus arm OABI complaints */ union { - struct { - /* pack this to avoid bogus arm OABI complaints */ - union { - /* index into fixed buffers, if used */ - __u16 buf_index; - /* for grouped buffer selection */ - __u16 buf_group; - } __attribute__((packed)); - /* personality to use, if used */ - __u16 personality; - __s32 splice_fd_in; - }; - __u64 __pad2[3]; - }; + /* index into fixed buffers, if used */ + __u16 buf_index; + /* for grouped buffer selection */ + __u16 buf_group; + } __attribute__((packed)); + /* personality to use, if used */ + __u16 personality; + __s32 splice_fd_in; + __u64 __pad2[2]; }; enum { -- cgit v1.2.3 From 6497ef8df568afbf5f3e38825a4590ff41611a54 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Thu, 29 Apr 2021 15:58:28 +0530 Subject: nbd: provide a way for userspace processes to identify device backends Problem: On reconfigure of device, there is no way to defend if the backend storage is matching with the initial backend storage. Say, if an initial connect request for backend "pool1/image1" got mapped to /dev/nbd0 and the userspace process is terminated. A next reconfigure request within NBD_ATTR_DEAD_CONN_TIMEOUT is allowed to use /dev/nbd0 for a different backend "pool1/image2" For example, an operation like below could be dangerous: $ sudo rbd-nbd map --try-netlink rbd-pool/ext4-image /dev/nbd0 $ sudo blkid /dev/nbd0 /dev/nbd0: UUID="bfc444b4-64b1-418f-8b36-6e0d170cfc04" TYPE="ext4" $ sudo pkill -9 rbd-nbd $ sudo rbd-nbd attach --try-netlink --device /dev/nbd0 rbd-pool/xfs-image /dev/nbd0 $ sudo blkid /dev/nbd0 /dev/nbd0: UUID="d29bf343-6570-4069-a9ea-2fa156ced908" TYPE="xfs" Solution: Provide a way for userspace processes to keep some metadata to identify between the device and the backend, so that when a reconfigure request is made, we can compare and avoid such dangerous operations. With this solution, as part of the initial connect request, backend path can be stored in the sysfs per device config, so that on a reconfigure request it's easy to check if the backend path matches with the initial connect backend path. Please note, ioctl interface to nbd will not have these changes, as there won't be any reconfigure. Signed-off-by: Prasanna Kumar Kalever Reviewed-by: Xiubo Li Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210429102828.31248-1-prasanna.kalever@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/nbd-netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nbd-netlink.h b/include/uapi/linux/nbd-netlink.h index c5d0ef7aa7d5..2d0b90964227 100644 --- a/include/uapi/linux/nbd-netlink.h +++ b/include/uapi/linux/nbd-netlink.h @@ -35,6 +35,7 @@ enum { NBD_ATTR_SOCKETS, NBD_ATTR_DEAD_CONN_TIMEOUT, NBD_ATTR_DEVICE_LIST, + NBD_ATTR_BACKEND_IDENTIFIER, __NBD_ATTR_MAX, }; #define NBD_ATTR_MAX (__NBD_ATTR_MAX - 1) -- cgit v1.2.3 From 964ab0040ff9598783bf37776b5e31b27b50e293 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Wed, 30 Jun 2021 18:49:27 -0700 Subject: userfaultfd/shmem: advertise shmem minor fault support Now that the feature is fully implemented (the faulting path hooks exist so userspace is notified, and the ioctl to resolve such faults is available), advertise this as a supported feature. Link: https://lkml.kernel.org/r/20210503180737.2487560-6-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Acked-by: Hugh Dickins Acked-by: Peter Xu Cc: Alexander Viro Cc: Andrea Arcangeli Cc: Brian Geffon Cc: "Dr . David Alan Gilbert" Cc: Jerome Glisse Cc: Joe Perches Cc: Kirill A. Shutemov Cc: Lokesh Gidra Cc: Mike Kravetz Cc: Mike Rapoport Cc: Mina Almasry Cc: Oliver Upton Cc: Shaohua Li Cc: Shuah Khan Cc: Stephen Rothwell Cc: Wang Qing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 650480f41f1d..05b31d60acf6 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -31,7 +31,8 @@ UFFD_FEATURE_MISSING_SHMEM | \ UFFD_FEATURE_SIGBUS | \ UFFD_FEATURE_THREAD_ID | \ - UFFD_FEATURE_MINOR_HUGETLBFS) + UFFD_FEATURE_MINOR_HUGETLBFS | \ + UFFD_FEATURE_MINOR_SHMEM) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -185,6 +186,9 @@ struct uffdio_api { * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults * can be intercepted (via REGISTER_MODE_MINOR) for * hugetlbfs-backed pages. + * + * UFFD_FEATURE_MINOR_SHMEM indicates the same support as + * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -196,6 +200,7 @@ struct uffdio_api { #define UFFD_FEATURE_SIGBUS (1<<7) #define UFFD_FEATURE_THREAD_ID (1<<8) #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) +#define UFFD_FEATURE_MINOR_SHMEM (1<<10) __u64 features; __u64 ioctls; -- cgit v1.2.3 From 7858d7bca7fbbbbd5b940d2ec371b2d060b21b84 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 30 Jun 2021 18:51:00 -0700 Subject: mm/mempolicy: don't handle MPOL_LOCAL like a fake MPOL_PREFERRED policy MPOL_LOCAL policy has been setup as a real policy, but it is still handled like a faked POL_PREFERRED policy with one internal MPOL_F_LOCAL flag bit set, and there are many places having to judge the real 'prefer' or the 'local' policy, which are quite confusing. In current code, there are 4 cases that MPOL_LOCAL are used: 1. user specifies 'local' policy 2. user specifies 'prefer' policy, but with empty nodemask 3. system 'default' policy is used 4. 'prefer' policy + valid 'preferred' node with MPOL_F_STATIC_NODES flag set, and when it is 'rebind' to a nodemask which doesn't contains the 'preferred' node, it will perform as 'local' policy So make 'local' a real policy instead of a fake 'prefer' one, and kill MPOL_F_LOCAL bit, which can greatly reduce the confusion for code reading. For case 4, the logic of mpol_rebind_preferred() is confusing, as Michal Hocko pointed out: : I do believe that rebinding preferred policy is just bogus and it should : be dropped altogether on the ground that a preference is a mere hint from : userspace where to start the allocation. Unless I am missing something : cpusets will be always authoritative for the final placement. The : preferred node just acts as a starting point and it should be really : preserved when cpusets changes. Otherwise we have a very subtle behavior : corner cases. So dump all the tricky transformation between 'prefer' and 'local', and just record the new nodemask of rebinding. [feng.tang@intel.com: fix a problem in mpol_set_nodemask(), per Michal Hocko] Link: https://lkml.kernel.org/r/1622560492-1294-3-git-send-email-feng.tang@intel.com [feng.tang@intel.com: refine code and comments of mpol_set_nodemask(), per Michal] Link: https://lkml.kernel.org/r/20210603081807.GE56979@shbuild999.sh.intel.com Link: https://lkml.kernel.org/r/1622469956-82897-3-git-send-email-feng.tang@intel.com Signed-off-by: Feng Tang Suggested-by: Michal Hocko Acked-by: Michal Hocko Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Ben Widawsky Cc: Dan Williams Cc: Dave Hansen Cc: David Rientjes Cc: Huang Ying Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Kravetz Cc: Randy Dunlap Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/mempolicy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 4832fd0b5642..19a00bc7fe86 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -60,7 +60,6 @@ enum { * are never OR'ed into the mode in mempolicy API arguments. */ #define MPOL_F_SHARED (1 << 0) /* identify shared policies */ -#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ #define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ #define MPOL_F_MORON (1 << 4) /* Migrate On protnone Reference On Node */ -- cgit v1.2.3 From 4ca9b3859dac14bbef0c27d00667bb5b10917adb Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 30 Jun 2021 18:52:28 -0700 Subject: mm/madvise: introduce MADV_POPULATE_(READ|WRITE) to prefault page tables I. Background: Sparse Memory Mappings When we manage sparse memory mappings dynamically in user space - also sometimes involving MAP_NORESERVE - we want to dynamically populate/ discard memory inside such a sparse memory region. Example users are hypervisors (especially implementing memory ballooning or similar technologies like virtio-mem) and memory allocators. In addition, we want to fail in a nice way (instead of generating SIGBUS) if populating does not succeed because we are out of backend memory (which can happen easily with file-based mappings, especially tmpfs and hugetlbfs). While MADV_DONTNEED, MADV_REMOVE and FALLOC_FL_PUNCH_HOLE allow for reliably discarding memory for most mapping types, there is no generic approach to populate page tables and preallocate memory. Although mmap() supports MAP_POPULATE, it is not applicable to the concept of sparse memory mappings, where we want to populate/discard dynamically and avoid expensive/problematic remappings. In addition, we never actually report errors during the final populate phase - it is best-effort only. fallocate() can be used to preallocate file-based memory and fail in a safe way. However, it cannot really be used for any private mappings on anonymous files via memfd due to COW semantics. In addition, fallocate() does not actually populate page tables, so we still always get pagefaults on first access - which is sometimes undesired (i.e., real-time workloads) and requires real prefaulting of page tables, not just a preallocation of backend storage. There might be interesting use cases for sparse memory regions along with mlockall(MCL_ONFAULT) which fallocate() cannot satisfy as it does not prefault page tables. II. On preallcoation/prefaulting from user space Because we don't have a proper interface, what applications (like QEMU and databases) end up doing is touching (i.e., reading+writing one byte to not overwrite existing data) all individual pages. However, that approach 1) Can result in wear on storage backing, because we end up reading/writing each page; this is especially a problem for dax/pmem. 2) Can result in mmap_sem contention when prefaulting via multiple threads. 3) Requires expensive signal handling, especially to catch SIGBUS in case of hugetlbfs/shmem/file-backed memory. For example, this is problematic in hypervisors like QEMU where SIGBUS handlers might already be used by other subsystems concurrently to e.g, handle hardware errors. "Simply" doing preallocation concurrently from other thread is not that easy. III. On MADV_WILLNEED Extending MADV_WILLNEED is not an option because 1. It would change the semantics: "Expect access in the near future." and "might be a good idea to read some pages" vs. "Definitely populate/ preallocate all memory and definitely fail on errors.". 2. Existing users (like virtio-balloon in QEMU when deflating the balloon) don't want populate/prealloc semantics. They treat this rather as a hint to give a little performance boost without too much overhead - and don't expect that a lot of memory might get consumed or a lot of time might be spent. IV. MADV_POPULATE_READ and MADV_POPULATE_WRITE Let's introduce MADV_POPULATE_READ and MADV_POPULATE_WRITE, inspired by MAP_POPULATE, with the following semantics: 1. MADV_POPULATE_READ can be used to prefault page tables just like manually reading each individual page. This will not break any COW mappings. The shared zero page might get mapped and no backend storage might get preallocated -- allocation might be deferred to write-fault time. Especially shared file mappings require an explicit fallocate() upfront to actually preallocate backend memory (blocks in the file system) in case the file might have holes. 2. If MADV_POPULATE_READ succeeds, all page tables have been populated (prefaulted) readable once. 3. MADV_POPULATE_WRITE can be used to preallocate backend memory and prefault page tables just like manually writing (or reading+writing) each individual page. This will break any COW mappings -- e.g., the shared zeropage is never populated. 4. If MADV_POPULATE_WRITE succeeds, all page tables have been populated (prefaulted) writable once. 5. MADV_POPULATE_READ and MADV_POPULATE_WRITE cannot be applied to special mappings marked with VM_PFNMAP and VM_IO. Also, proper access permissions (e.g., PROT_READ, PROT_WRITE) are required. If any such mapping is encountered, madvise() fails with -EINVAL. 6. If MADV_POPULATE_READ or MADV_POPULATE_WRITE fails, some page tables might have been populated. 7. MADV_POPULATE_READ and MADV_POPULATE_WRITE will return -EHWPOISON when encountering a HW poisoned page in the range. 8. Similar to MAP_POPULATE, MADV_POPULATE_READ and MADV_POPULATE_WRITE cannot protect from the OOM (Out Of Memory) handler killing the process. While the use case for MADV_POPULATE_WRITE is fairly obvious (i.e., preallocate memory and prefault page tables for VMs), one issue is that whenever we prefault pages writable, the pages have to be marked dirty, because the CPU could dirty them any time. while not a real problem for hugetlbfs or dax/pmem, it can be a problem for shared file mappings: each page will be marked dirty and has to be written back later when evicting. MADV_POPULATE_READ allows for optimizing this scenario: Pre-read a whole mapping from backend storage without marking it dirty, such that eviction won't have to write it back. As discussed above, shared file mappings might require an explciit fallocate() upfront to achieve preallcoation+prepopulation. Although sparse memory mappings are the primary use case, this will also be useful for other preallocate/prefault use cases where MAP_POPULATE is not desired or the semantics of MAP_POPULATE are not sufficient: as one example, QEMU users can trigger preallocation/prefaulting of guest RAM after the mapping was created -- and don't want errors to be silently suppressed. Looking at the history, MADV_POPULATE was already proposed in 2013 [1], however, the main motivation back than was performance improvements -- which should also still be the case. V. Single-threaded performance comparison I did a short experiment, prefaulting page tables on completely *empty mappings/files* and repeated the experiment 10 times. The results correspond to the shortest execution time. In general, the performance benefit for huge pages is negligible with small mappings. V.1: Private mappings POPULATE_READ and POPULATE_WRITE is fastest. Note that Reading/POPULATE_READ will populate the shared zeropage where applicable -- which result in short population times. The fastest way to allocate backend storage (here: swap or huge pages) and prefault page tables is POPULATE_WRITE. V.2: Shared mappings fallocate() is fastest, however, doesn't prefault page tables. POPULATE_WRITE is faster than simple writes and read/writes. POPULATE_READ is faster than simple reads. Without a fd, the fastest way to allocate backend storage and prefault page tables is POPULATE_WRITE. With an fd, the fastest way is usually FALLOCATE+POPULATE_READ or FALLOCATE+POPULATE_WRITE respectively; one exception are actual files: FALLOCATE+Read is slightly faster than FALLOCATE+POPULATE_READ. The fastest way to allocate backend storage prefault page tables is FALLOCATE+POPULATE_WRITE -- except when dealing with actual files; then, FALLOCATE+POPULATE_READ is fastest and won't directly mark all pages as dirty. v.3: Detailed results ================================================== 2 MiB MAP_PRIVATE: ************************************************** Anon 4 KiB : Read : 0.119 ms Anon 4 KiB : Write : 0.222 ms Anon 4 KiB : Read/Write : 0.380 ms Anon 4 KiB : POPULATE_READ : 0.060 ms Anon 4 KiB : POPULATE_WRITE : 0.158 ms Memfd 4 KiB : Read : 0.034 ms Memfd 4 KiB : Write : 0.310 ms Memfd 4 KiB : Read/Write : 0.362 ms Memfd 4 KiB : POPULATE_READ : 0.039 ms Memfd 4 KiB : POPULATE_WRITE : 0.229 ms Memfd 2 MiB : Read : 0.030 ms Memfd 2 MiB : Write : 0.030 ms Memfd 2 MiB : Read/Write : 0.030 ms Memfd 2 MiB : POPULATE_READ : 0.030 ms Memfd 2 MiB : POPULATE_WRITE : 0.030 ms tmpfs : Read : 0.033 ms tmpfs : Write : 0.313 ms tmpfs : Read/Write : 0.406 ms tmpfs : POPULATE_READ : 0.039 ms tmpfs : POPULATE_WRITE : 0.285 ms file : Read : 0.033 ms file : Write : 0.351 ms file : Read/Write : 0.408 ms file : POPULATE_READ : 0.039 ms file : POPULATE_WRITE : 0.290 ms hugetlbfs : Read : 0.030 ms hugetlbfs : Write : 0.030 ms hugetlbfs : Read/Write : 0.030 ms hugetlbfs : POPULATE_READ : 0.030 ms hugetlbfs : POPULATE_WRITE : 0.030 ms ************************************************** 4096 MiB MAP_PRIVATE: ************************************************** Anon 4 KiB : Read : 237.940 ms Anon 4 KiB : Write : 708.409 ms Anon 4 KiB : Read/Write : 1054.041 ms Anon 4 KiB : POPULATE_READ : 124.310 ms Anon 4 KiB : POPULATE_WRITE : 572.582 ms Memfd 4 KiB : Read : 136.928 ms Memfd 4 KiB : Write : 963.898 ms Memfd 4 KiB : Read/Write : 1106.561 ms Memfd 4 KiB : POPULATE_READ : 78.450 ms Memfd 4 KiB : POPULATE_WRITE : 805.881 ms Memfd 2 MiB : Read : 357.116 ms Memfd 2 MiB : Write : 357.210 ms Memfd 2 MiB : Read/Write : 357.606 ms Memfd 2 MiB : POPULATE_READ : 356.094 ms Memfd 2 MiB : POPULATE_WRITE : 356.937 ms tmpfs : Read : 137.536 ms tmpfs : Write : 954.362 ms tmpfs : Read/Write : 1105.954 ms tmpfs : POPULATE_READ : 80.289 ms tmpfs : POPULATE_WRITE : 822.826 ms file : Read : 137.874 ms file : Write : 987.025 ms file : Read/Write : 1107.439 ms file : POPULATE_READ : 80.413 ms file : POPULATE_WRITE : 857.622 ms hugetlbfs : Read : 355.607 ms hugetlbfs : Write : 355.729 ms hugetlbfs : Read/Write : 356.127 ms hugetlbfs : POPULATE_READ : 354.585 ms hugetlbfs : POPULATE_WRITE : 355.138 ms ************************************************** 2 MiB MAP_SHARED: ************************************************** Anon 4 KiB : Read : 0.394 ms Anon 4 KiB : Write : 0.348 ms Anon 4 KiB : Read/Write : 0.400 ms Anon 4 KiB : POPULATE_READ : 0.326 ms Anon 4 KiB : POPULATE_WRITE : 0.273 ms Anon 2 MiB : Read : 0.030 ms Anon 2 MiB : Write : 0.030 ms Anon 2 MiB : Read/Write : 0.030 ms Anon 2 MiB : POPULATE_READ : 0.030 ms Anon 2 MiB : POPULATE_WRITE : 0.030 ms Memfd 4 KiB : Read : 0.412 ms Memfd 4 KiB : Write : 0.372 ms Memfd 4 KiB : Read/Write : 0.419 ms Memfd 4 KiB : POPULATE_READ : 0.343 ms Memfd 4 KiB : POPULATE_WRITE : 0.288 ms Memfd 4 KiB : FALLOCATE : 0.137 ms Memfd 4 KiB : FALLOCATE+Read : 0.446 ms Memfd 4 KiB : FALLOCATE+Write : 0.330 ms Memfd 4 KiB : FALLOCATE+Read/Write : 0.454 ms Memfd 4 KiB : FALLOCATE+POPULATE_READ : 0.379 ms Memfd 4 KiB : FALLOCATE+POPULATE_WRITE : 0.268 ms Memfd 2 MiB : Read : 0.030 ms Memfd 2 MiB : Write : 0.030 ms Memfd 2 MiB : Read/Write : 0.030 ms Memfd 2 MiB : POPULATE_READ : 0.030 ms Memfd 2 MiB : POPULATE_WRITE : 0.030 ms Memfd 2 MiB : FALLOCATE : 0.030 ms Memfd 2 MiB : FALLOCATE+Read : 0.031 ms Memfd 2 MiB : FALLOCATE+Write : 0.031 ms Memfd 2 MiB : FALLOCATE+Read/Write : 0.031 ms Memfd 2 MiB : FALLOCATE+POPULATE_READ : 0.030 ms Memfd 2 MiB : FALLOCATE+POPULATE_WRITE : 0.030 ms tmpfs : Read : 0.416 ms tmpfs : Write : 0.369 ms tmpfs : Read/Write : 0.425 ms tmpfs : POPULATE_READ : 0.346 ms tmpfs : POPULATE_WRITE : 0.295 ms tmpfs : FALLOCATE : 0.139 ms tmpfs : FALLOCATE+Read : 0.447 ms tmpfs : FALLOCATE+Write : 0.333 ms tmpfs : FALLOCATE+Read/Write : 0.454 ms tmpfs : FALLOCATE+POPULATE_READ : 0.380 ms tmpfs : FALLOCATE+POPULATE_WRITE : 0.272 ms file : Read : 0.191 ms file : Write : 0.511 ms file : Read/Write : 0.524 ms file : POPULATE_READ : 0.196 ms file : POPULATE_WRITE : 0.434 ms file : FALLOCATE : 0.004 ms file : FALLOCATE+Read : 0.197 ms file : FALLOCATE+Write : 0.554 ms file : FALLOCATE+Read/Write : 0.480 ms file : FALLOCATE+POPULATE_READ : 0.201 ms file : FALLOCATE+POPULATE_WRITE : 0.381 ms hugetlbfs : Read : 0.030 ms hugetlbfs : Write : 0.030 ms hugetlbfs : Read/Write : 0.030 ms hugetlbfs : POPULATE_READ : 0.030 ms hugetlbfs : POPULATE_WRITE : 0.030 ms hugetlbfs : FALLOCATE : 0.030 ms hugetlbfs : FALLOCATE+Read : 0.031 ms hugetlbfs : FALLOCATE+Write : 0.031 ms hugetlbfs : FALLOCATE+Read/Write : 0.030 ms hugetlbfs : FALLOCATE+POPULATE_READ : 0.030 ms hugetlbfs : FALLOCATE+POPULATE_WRITE : 0.030 ms ************************************************** 4096 MiB MAP_SHARED: ************************************************** Anon 4 KiB : Read : 1053.090 ms Anon 4 KiB : Write : 913.642 ms Anon 4 KiB : Read/Write : 1060.350 ms Anon 4 KiB : POPULATE_READ : 893.691 ms Anon 4 KiB : POPULATE_WRITE : 782.885 ms Anon 2 MiB : Read : 358.553 ms Anon 2 MiB : Write : 358.419 ms Anon 2 MiB : Read/Write : 357.992 ms Anon 2 MiB : POPULATE_READ : 357.533 ms Anon 2 MiB : POPULATE_WRITE : 357.808 ms Memfd 4 KiB : Read : 1078.144 ms Memfd 4 KiB : Write : 942.036 ms Memfd 4 KiB : Read/Write : 1100.391 ms Memfd 4 KiB : POPULATE_READ : 925.829 ms Memfd 4 KiB : POPULATE_WRITE : 804.394 ms Memfd 4 KiB : FALLOCATE : 304.632 ms Memfd 4 KiB : FALLOCATE+Read : 1163.359 ms Memfd 4 KiB : FALLOCATE+Write : 933.186 ms Memfd 4 KiB : FALLOCATE+Read/Write : 1187.304 ms Memfd 4 KiB : FALLOCATE+POPULATE_READ : 1013.660 ms Memfd 4 KiB : FALLOCATE+POPULATE_WRITE : 794.560 ms Memfd 2 MiB : Read : 358.131 ms Memfd 2 MiB : Write : 358.099 ms Memfd 2 MiB : Read/Write : 358.250 ms Memfd 2 MiB : POPULATE_READ : 357.563 ms Memfd 2 MiB : POPULATE_WRITE : 357.334 ms Memfd 2 MiB : FALLOCATE : 356.735 ms Memfd 2 MiB : FALLOCATE+Read : 358.152 ms Memfd 2 MiB : FALLOCATE+Write : 358.331 ms Memfd 2 MiB : FALLOCATE+Read/Write : 358.018 ms Memfd 2 MiB : FALLOCATE+POPULATE_READ : 357.286 ms Memfd 2 MiB : FALLOCATE+POPULATE_WRITE : 357.523 ms tmpfs : Read : 1087.265 ms tmpfs : Write : 950.840 ms tmpfs : Read/Write : 1107.567 ms tmpfs : POPULATE_READ : 922.605 ms tmpfs : POPULATE_WRITE : 810.094 ms tmpfs : FALLOCATE : 306.320 ms tmpfs : FALLOCATE+Read : 1169.796 ms tmpfs : FALLOCATE+Write : 933.730 ms tmpfs : FALLOCATE+Read/Write : 1191.610 ms tmpfs : FALLOCATE+POPULATE_READ : 1020.474 ms tmpfs : FALLOCATE+POPULATE_WRITE : 798.945 ms file : Read : 654.101 ms file : Write : 1259.142 ms file : Read/Write : 1289.509 ms file : POPULATE_READ : 661.642 ms file : POPULATE_WRITE : 1106.816 ms file : FALLOCATE : 1.864 ms file : FALLOCATE+Read : 656.328 ms file : FALLOCATE+Write : 1153.300 ms file : FALLOCATE+Read/Write : 1180.613 ms file : FALLOCATE+POPULATE_READ : 668.347 ms file : FALLOCATE+POPULATE_WRITE : 996.143 ms hugetlbfs : Read : 357.245 ms hugetlbfs : Write : 357.413 ms hugetlbfs : Read/Write : 357.120 ms hugetlbfs : POPULATE_READ : 356.321 ms hugetlbfs : POPULATE_WRITE : 356.693 ms hugetlbfs : FALLOCATE : 355.927 ms hugetlbfs : FALLOCATE+Read : 357.074 ms hugetlbfs : FALLOCATE+Write : 357.120 ms hugetlbfs : FALLOCATE+Read/Write : 356.983 ms hugetlbfs : FALLOCATE+POPULATE_READ : 356.413 ms hugetlbfs : FALLOCATE+POPULATE_WRITE : 356.266 ms ************************************************** [1] https://lkml.org/lkml/2013/6/27/698 [akpm@linux-foundation.org: coding style fixes] Link: https://lkml.kernel.org/r/20210419135443.12822-3-david@redhat.com Signed-off-by: David Hildenbrand Cc: Arnd Bergmann Cc: Michal Hocko Cc: Oscar Salvador Cc: Matthew Wilcox (Oracle) Cc: Andrea Arcangeli Cc: Minchan Kim Cc: Jann Horn Cc: Jason Gunthorpe Cc: Dave Hansen Cc: Hugh Dickins Cc: Rik van Riel Cc: Michael S. Tsirkin Cc: Kirill A. Shutemov Cc: Vlastimil Babka Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Thomas Bogendoerfer Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Chris Zankel Cc: Max Filippov Cc: Mike Kravetz Cc: Peter Xu Cc: Rolf Eike Beer Cc: Ram Pai Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/mman-common.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index f94f65d429be..1567a3294c3d 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -72,6 +72,9 @@ #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ +#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + /* compatibility flags */ #define MAP_FILE 0 -- cgit v1.2.3 From d61914ea6adabde9126b0bed64a7a3a42249435e Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Mon, 10 May 2021 16:10:14 +0800 Subject: virtio: update virtio id table, add transitional ids This commit updates virtio id table by adding transitional device ids Signed-off-by: Zhu Lingshan Link: https://lore.kernel.org/r/20210510081015.4212-2-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ids.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 4fe842c3a3a9..70a8057ad4bb 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -57,4 +57,16 @@ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */ +/* + * Virtio Transitional IDs + */ + +#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */ +#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */ +#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */ +#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */ +#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */ +#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */ +#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */ + #endif /* _LINUX_VIRTIO_IDS_H */ -- cgit v1.2.3 From 347269c113f10fbe893f11dd3ae5f44aa15d3111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Sat, 3 Jul 2021 15:13:02 +0000 Subject: PCI: Fix kernel-doc formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix kernel-doc formatting throughout drivers/pci and related include files. No change to functionality intended. Check for warnings: $ find include drivers/pci -type f -path "*pci*.[ch]" | xargs scripts/kernel-doc -none [bhelgaas: squashed to one commit] Link: https://lore.kernel.org/r/20210509030237.368540-1-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-1-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-2-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-3-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-4-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-5-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pcitest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h index c3ab4c826297..f9c1af8d141b 100644 --- a/include/uapi/linux/pcitest.h +++ b/include/uapi/linux/pcitest.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/** +/* * pcitest.h - PCI test uapi defines * * Copyright (C) 2017 Texas Instruments -- cgit v1.2.3 From 1507f51255c9ff07d75909a84e7c0d7f3c4b2f49 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 7 Jul 2021 18:08:03 -0700 Subject: mm: introduce memfd_secret system call to create "secret" memory areas Introduce "memfd_secret" system call with the ability to create memory areas visible only in the context of the owning process and not mapped not only to other processes but in the kernel page tables as well. The secretmem feature is off by default and the user must explicitly enable it at the boot time. Once secretmem is enabled, the user will be able to create a file descriptor using the memfd_secret() system call. The memory areas created by mmap() calls from this file descriptor will be unmapped from the kernel direct map and they will be only mapped in the page table of the processes that have access to the file descriptor. Secretmem is designed to provide the following protections: * Enhanced protection (in conjunction with all the other in-kernel attack prevention systems) against ROP attacks. Seceretmem makes "simple" ROP insufficient to perform exfiltration, which increases the required complexity of the attack. Along with other protections like the kernel stack size limit and address space layout randomization which make finding gadgets is really hard, absence of any in-kernel primitive for accessing secret memory means the one gadget ROP attack can't work. Since the only way to access secret memory is to reconstruct the missing mapping entry, the attacker has to recover the physical page and insert a PTE pointing to it in the kernel and then retrieve the contents. That takes at least three gadgets which is a level of difficulty beyond most standard attacks. * Prevent cross-process secret userspace memory exposures. Once the secret memory is allocated, the user can't accidentally pass it into the kernel to be transmitted somewhere. The secreremem pages cannot be accessed via the direct map and they are disallowed in GUP. * Harden against exploited kernel flaws. In order to access secretmem, a kernel-side attack would need to either walk the page tables and create new ones, or spawn a new privileged uiserspace process to perform secrets exfiltration using ptrace. The file descriptor based memory has several advantages over the "traditional" mm interfaces, such as mlock(), mprotect(), madvise(). File descriptor approach allows explicit and controlled sharing of the memory areas, it allows to seal the operations. Besides, file descriptor based memory paves the way for VMMs to remove the secret memory range from the userspace hipervisor process, for instance QEMU. Andy Lutomirski says: "Getting fd-backed memory into a guest will take some possibly major work in the kernel, but getting vma-backed memory into a guest without mapping it in the host user address space seems much, much worse." memfd_secret() is made a dedicated system call rather than an extension to memfd_create() because it's purpose is to allow the user to create more secure memory mappings rather than to simply allow file based access to the memory. Nowadays a new system call cost is negligible while it is way simpler for userspace to deal with a clear-cut system calls than with a multiplexer or an overloaded syscall. Moreover, the initial implementation of memfd_secret() is completely distinct from memfd_create() so there is no much sense in overloading memfd_create() to begin with. If there will be a need for code sharing between these implementation it can be easily achieved without a need to adjust user visible APIs. The secret memory remains accessible in the process context using uaccess primitives, but it is not exposed to the kernel otherwise; secret memory areas are removed from the direct map and functions in the follow_page()/get_user_page() family will refuse to return a page that belongs to the secret memory area. Once there will be a use case that will require exposing secretmem to the kernel it will be an opt-in request in the system call flags so that user would have to decide what data can be exposed to the kernel. Removing of the pages from the direct map may cause its fragmentation on architectures that use large pages to map the physical memory which affects the system performance. However, the original Kconfig text for CONFIG_DIRECT_GBPAGES said that gigabyte pages in the direct map "... can improve the kernel's performance a tiny bit ..." (commit 00d1c5e05736 ("x86: add gbpages switches")) and the recent report [1] showed that "... although 1G mappings are a good default choice, there is no compelling evidence that it must be the only choice". Hence, it is sufficient to have secretmem disabled by default with the ability of a system administrator to enable it at boot time. Pages in the secretmem regions are unevictable and unmovable to avoid accidental exposure of the sensitive data via swap or during page migration. Since the secretmem mappings are locked in memory they cannot exceed RLIMIT_MEMLOCK. Since these mappings are already locked independently from mlock(), an attempt to mlock()/munlock() secretmem range would fail and mlockall()/munlockall() will ignore secretmem mappings. However, unlike mlock()ed memory, secretmem currently behaves more like long-term GUP: secretmem mappings are unmovable mappings directly consumed by user space. With default limits, there is no excessive use of secretmem and it poses no real problem in combination with ZONE_MOVABLE/CMA, but in the future this should be addressed to allow balanced use of large amounts of secretmem along with ZONE_MOVABLE/CMA. A page that was a part of the secret memory area is cleared when it is freed to ensure the data is not exposed to the next user of that page. The following example demonstrates creation of a secret mapping (error handling is omitted): fd = memfd_secret(0); ftruncate(fd, MAP_SIZE); ptr = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); [1] https://lore.kernel.org/linux-mm/213b4567-46ce-f116-9cdf-bbd0c884eb3c@linux.intel.com/ [akpm@linux-foundation.org: suppress Kconfig whine] Link: https://lkml.kernel.org/r/20210518072034.31572-5-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Hagen Paul Pfeifer Acked-by: James Bottomley Cc: Alexander Viro Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christopher Lameter Cc: Dan Williams Cc: Dave Hansen Cc: Elena Reshetova Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Bottomley Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Mark Rutland Cc: Michael Kerrisk Cc: Palmer Dabbelt Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Rick Edgecombe Cc: Roman Gushchin Cc: Shakeel Butt Cc: Shuah Khan Cc: Thomas Gleixner Cc: Tycho Andersen Cc: Will Deacon Cc: David Hildenbrand Cc: kernel test robot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index f3956fc11de6..35687dcb1a42 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -97,5 +97,6 @@ #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ #define Z3FOLD_MAGIC 0x33 #define PPC_CMM_MAGIC 0xc7571590 +#define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #endif /* __LINUX_MAGIC_H__ */ -- cgit v1.2.3 From 7bb7f2ac24a028b20fca466b9633847b289b156a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 7 Jul 2021 18:08:11 -0700 Subject: arch, mm: wire up memfd_secret system call where relevant Wire up memfd_secret system call on architectures that define ARCH_HAS_SET_DIRECT_MAP, namely arm64, risc-v and x86. Link: https://lkml.kernel.org/r/20210518072034.31572-7-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Palmer Dabbelt Acked-by: Arnd Bergmann Acked-by: Catalin Marinas Acked-by: David Hildenbrand Acked-by: James Bottomley Cc: Alexander Viro Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Christopher Lameter Cc: Dan Williams Cc: Dave Hansen Cc: David Hildenbrand Cc: Elena Reshetova Cc: Hagen Paul Pfeifer Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Bottomley Cc: "Kirill A. Shutemov" Cc: Mark Rutland Cc: Matthew Wilcox Cc: Michael Kerrisk Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Rick Edgecombe Cc: Roman Gushchin Cc: Shakeel Butt Cc: Shuah Khan Cc: Thomas Gleixner Cc: Tycho Andersen Cc: Will Deacon Cc: kernel test robot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/unistd.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index f211961ce1da..a9d6fcd95f42 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -873,8 +873,13 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) #define __NR_landlock_restrict_self 446 __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) +#ifdef __ARCH_WANT_MEMFD_SECRET +#define __NR_memfd_secret 447 +__SYSCALL(__NR_memfd_secret, sys_memfd_secret) +#endif + #undef __NR_syscalls -#define __NR_syscalls 447 +#define __NR_syscalls 448 /* * 32 bit systems traditionally used different -- cgit v1.2.3