From 26594678d00f94c62f2e43162bd6d10fd0b74917 Mon Sep 17 00:00:00 2001 From: Leandro Ribeiro Date: Wed, 9 Jun 2021 20:00:38 -0300 Subject: drm/doc: document how userspace should find out CRTC index In this patch we add a section to document what userspace should do to find out the CRTC index. This is important as they may be many places in the documentation that need this, so it's better to just point to this section and avoid repetition. Signed-off-by: Leandro Ribeiro Reviewed-by: Pekka Paalanen Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210609230039.73307-2-leandro.ribeiro@collabora.com --- include/uapi/drm/drm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index d043752a74cf..e1f49dd241f7 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -635,8 +635,8 @@ struct drm_gem_open { /** * DRM_CAP_VBLANK_HIGH_CRTC * - * If set to 1, the kernel supports specifying a CRTC index in the high bits of - * &drm_wait_vblank_request.type. + * If set to 1, the kernel supports specifying a :ref:`CRTC index` + * in the high bits of &drm_wait_vblank_request.type. * * Starting kernel version 2.6.39, this capability is always set to 1. */ -- cgit v1.2.3 From 7a7a933edd6c3a6d5d64e08093f2d564104cefcd Mon Sep 17 00:00:00 2001 From: Martin Krastev Date: Wed, 9 Jun 2021 13:23:00 -0400 Subject: drm/vmwgfx: Introduce VMware mks-guest-stats VMware mks-guest-stats mechanism allows the collection of performance stats from guest userland GL contexts, as well as from vmwgfx kernelspace, via a set of sw- defined performance counters. The userspace performance counters are (de)registerd with vmware-vmx-stats hypervisor via new iocts. The vmwgfx kernelspace counters are controlled at build-time via a new config DRM_VMWGFX_MKSSTATS. * Add vmw_mksstat_{add|remove|reset}_ioctl controlling the tracking of mks-guest-stats in guest winsys contexts * Add DRM_VMWGFX_MKSSTATS config to drivers/gpu/drm/vmwgfx/Kconfig controlling the instrumentation of vmwgfx for kernelspace mks-guest-stats counters * Instrument vmwgfx vmw_execbuf_ioctl to collect mks-guest-stats according to DRM_VMWGFX_MKSSTATS Signed-off-by: Martin Krastev Reviewed-by: Zack Rusin Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20210609172307.131929-3-zackr@vmware.com --- include/uapi/drm/vmwgfx_drm.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h index 02e917507479..9078775feb51 100644 --- a/include/uapi/drm/vmwgfx_drm.h +++ b/include/uapi/drm/vmwgfx_drm.h @@ -72,6 +72,9 @@ extern "C" { #define DRM_VMW_GB_SURFACE_CREATE_EXT 27 #define DRM_VMW_GB_SURFACE_REF_EXT 28 #define DRM_VMW_MSG 29 +#define DRM_VMW_MKSSTAT_RESET 30 +#define DRM_VMW_MKSSTAT_ADD 31 +#define DRM_VMW_MKSSTAT_REMOVE 32 /*************************************************************************/ /** @@ -1236,6 +1239,44 @@ struct drm_vmw_msg_arg { __u32 receive_len; }; +/** + * struct drm_vmw_mksstat_add_arg + * + * @stat: Pointer to user-space stat-counters array, page-aligned. + * @info: Pointer to user-space counter-infos array, page-aligned. + * @strs: Pointer to user-space stat strings, page-aligned. + * @stat_len: Length in bytes of stat-counters array. + * @info_len: Length in bytes of counter-infos array. + * @strs_len: Length in bytes of the stat strings, terminators included. + * @description: Pointer to instance descriptor string; will be truncated + * to MKS_GUEST_STAT_INSTANCE_DESC_LENGTH chars. + * @id: Output identifier of the produced record; -1 if error. + * + * Argument to the DRM_VMW_MKSSTAT_ADD ioctl. + */ +struct drm_vmw_mksstat_add_arg { + __u64 stat; + __u64 info; + __u64 strs; + __u64 stat_len; + __u64 info_len; + __u64 strs_len; + __u64 description; + __u64 id; +}; + +/** + * struct drm_vmw_mksstat_remove_arg + * + * @id: Identifier of the record being disposed, originally obtained through + * DRM_VMW_MKSSTAT_ADD ioctl. + * + * Argument to the DRM_VMW_MKSSTAT_REMOVE ioctl. + */ +struct drm_vmw_mksstat_remove_arg { + __u64 id; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From ade0e676ecdcdd3428210684c76493722e09a791 Mon Sep 17 00:00:00 2001 From: Leandro Ribeiro Date: Fri, 11 Jun 2021 18:35:16 -0300 Subject: drm/doc: document drm_mode_get_plane Add a small description and document struct fields of drm_mode_get_plane. Signed-off-by: Leandro Ribeiro Reviewed-by: Pekka Paalanen Signed-off-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/20210611213516.77904-2-leandro.ribeiro@collabora.com --- include/uapi/drm/drm_mode.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 9b6722d45f36..98bf130feda5 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -312,16 +312,48 @@ struct drm_mode_set_plane { __u32 src_w; }; +/** + * struct drm_mode_get_plane - Get plane metadata. + * + * Userspace can perform a GETPLANE ioctl to retrieve information about a + * plane. + * + * To retrieve the number of formats supported, set @count_format_types to zero + * and call the ioctl. @count_format_types will be updated with the value. + * + * To retrieve these formats, allocate an array with the memory needed to store + * @count_format_types formats. Point @format_type_ptr to this array and call + * the ioctl again (with @count_format_types still set to the value returned in + * the first ioctl call). + */ struct drm_mode_get_plane { + /** + * @plane_id: Object ID of the plane whose information should be + * retrieved. Set by caller. + */ __u32 plane_id; + /** @crtc_id: Object ID of the current CRTC. */ __u32 crtc_id; + /** @fb_id: Object ID of the current fb. */ __u32 fb_id; + /** + * @possible_crtcs: Bitmask of CRTC's compatible with the plane. CRTC's + * are created and they receive an index, which corresponds to their + * position in the bitmask. Bit N corresponds to + * :ref:`CRTC index` N. + */ __u32 possible_crtcs; + /** @gamma_size: Never used. */ __u32 gamma_size; + /** @count_format_types: Number of formats. */ __u32 count_format_types; + /** + * @format_type_ptr: Pointer to ``__u32`` array of formats that are + * supported by the plane. These formats do not require modifiers. + */ __u64 format_type_ptr; }; -- cgit v1.2.3 From c649432e86ca677d8762c5764a2832509ca8d449 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 11 Jun 2021 14:22:21 +0100 Subject: drm/i915: Fix busy ioctl commentary Just tidy one instance of incorrect context parameter name and a stray sentence ending from before reporting was converted to be class based. Signed-off-by: Tvrtko Ursulin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210611132221.1055650-1-tvrtko.ursulin@linux.intel.com --- include/uapi/drm/i915_drm.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c2c7759b7d2e..a1cb4aa035a9 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1348,12 +1348,11 @@ struct drm_i915_gem_busy { * reading from the object simultaneously. * * The value of each engine class is the same as specified in the - * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e. + * I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e. * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. - * reported as active itself. Some hardware may have parallel - * execution engines, e.g. multiple media engines, which are - * mapped to the same class identifier and so are not separately - * reported for busyness. + * Some hardware may have parallel execution engines, e.g. multiple + * media engines, which are mapped to the same class identifier and so + * are not separately reported for busyness. * * Caveat emptor: * Only the boolean result of this query is reliable; that is whether -- cgit v1.2.3 From 51f52547df700819db0d0e2b17b677cb209212b4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Jun 2021 14:42:58 -0500 Subject: dma-buf: Document DMA_BUF_IOCTL_SYNC (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a new "DMA Buffer ioctls" section to the dma-buf docs and adds documentation for DMA_BUF_IOCTL_SYNC. v2 (Daniel Vetter): - Fix a couple typos - Add commentary about synchronization with other devices - Use item list format for describing flags v3 (Pekka Paalanen): - Clarify stalling requirements. - Be more clear that that DMA_BUF_IOCTL_SYNC with SINC_END has to be called before more GPU work happens. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Acked-by: Christian König Acked-by: Pekka Paalanen Cc: Sumit Semwal Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210617194258.579011-1-jason@jlekstrand.net --- include/uapi/linux/dma-buf.h | 50 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index 7f30393b92c3..8e4a2ca0bcbf 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -22,8 +22,56 @@ #include -/* begin/end dma-buf functions used for userspace mmap. */ +/** + * struct dma_buf_sync - Synchronize with CPU access. + * + * When a DMA buffer is accessed from the CPU via mmap, it is not always + * possible to guarantee coherency between the CPU-visible map and underlying + * memory. To manage coherency, DMA_BUF_IOCTL_SYNC must be used to bracket + * any CPU access to give the kernel the chance to shuffle memory around if + * needed. + * + * Prior to accessing the map, the client must call DMA_BUF_IOCTL_SYNC + * with DMA_BUF_SYNC_START and the appropriate read/write flags. Once the + * access is complete, the client should call DMA_BUF_IOCTL_SYNC with + * DMA_BUF_SYNC_END and the same read/write flags. + * + * The synchronization provided via DMA_BUF_IOCTL_SYNC only provides cache + * coherency. It does not prevent other processes or devices from + * accessing the memory at the same time. If synchronization with a GPU or + * other device driver is required, it is the client's responsibility to + * wait for buffer to be ready for reading or writing before calling this + * ioctl with DMA_BUF_SYNC_START. Likewise, the client must ensure that + * follow-up work is not submitted to GPU or other device driver until + * after this ioctl has been called with DMA_BUF_SYNC_END? + * + * If the driver or API with which the client is interacting uses implicit + * synchronization, waiting for prior work to complete can be done via + * poll() on the DMA buffer file descriptor. If the driver or API requires + * explicit synchronization, the client may have to wait on a sync_file or + * other synchronization primitive outside the scope of the DMA buffer API. + */ struct dma_buf_sync { + /** + * @flags: Set of access flags + * + * DMA_BUF_SYNC_START: + * Indicates the start of a map access session. + * + * DMA_BUF_SYNC_END: + * Indicates the end of a map access session. + * + * DMA_BUF_SYNC_READ: + * Indicates that the mapped DMA buffer will be read by the + * client via the CPU map. + * + * DMA_BUF_SYNC_WRITE: + * Indicates that the mapped DMA buffer will be written by the + * client via the CPU map. + * + * DMA_BUF_SYNC_RW: + * An alias for DMA_BUF_SYNC_READ | DMA_BUF_SYNC_WRITE. + */ __u64 flags; }; -- cgit v1.2.3 From 577729533cdc4e37a8c230e404a44ad7a3ff4eda Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 18 Jun 2021 16:00:36 +0100 Subject: drm/i915: Document the Virtual Engine uAPI A little bit of documentation covering the topics of engine discovery, context engine maps and virtual engines. It is not very detailed but supposed to be a starting point of giving a brief high level overview of general principles and intended use cases. v2: * Have the text in uapi header and link from there. v4: * Link from driver-uapi.rst. Signed-off-by: Tvrtko Ursulin Cc: Daniel Vetter Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210618150036.2507653-1-tvrtko.ursulin@linux.intel.com --- include/uapi/drm/i915_drm.h | 188 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index a1cb4aa035a9..2f70c48567c0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1806,6 +1806,69 @@ struct drm_i915_gem_context_param_sseu { __u32 rsvd; }; +/** + * DOC: Virtual Engine uAPI + * + * Virtual engine is a concept where userspace is able to configure a set of + * physical engines, submit a batch buffer, and let the driver execute it on any + * engine from the set as it sees fit. + * + * This is primarily useful on parts which have multiple instances of a same + * class engine, like for example GT3+ Skylake parts with their two VCS engines. + * + * For instance userspace can enumerate all engines of a certain class using the + * previously described `Engine Discovery uAPI`_. After that userspace can + * create a GEM context with a placeholder slot for the virtual engine (using + * `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class + * and instance respectively) and finally using the + * `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in + * the same reserved slot. + * + * Example of creating a virtual engine and submitting a batch buffer to it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = { + * .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE, + * .engine_index = 0, // Place this virtual engine into engine map slot 0 + * .num_siblings = 2, + * .engines = { { I915_ENGINE_CLASS_VIDEO, 0 }, + * { I915_ENGINE_CLASS_VIDEO, 1 }, }, + * }; + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = { + * .engines = { { I915_ENGINE_CLASS_INVALID, + * I915_ENGINE_CLASS_INVALID_NONE } }, + * .extensions = to_user_pointer(&virtual), // Chains after load_balance extension + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // Now we have created a GEM context with its engine map containing a + * // single virtual engine. Submissions to this slot can go either to + * // vcs0 or vcs1, depending on the load balancing algorithm used inside + * // the driver. The load balancing is dynamic from one batch buffer to + * // another and transparent to userspace. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0 which is the virtual engine + * gem_execbuf(drm_fd, &execbuf); + */ + /* * i915_context_engines_load_balance: * @@ -1882,6 +1945,61 @@ struct i915_context_engines_bond { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +/** + * DOC: Context Engine Map uAPI + * + * Context engine map is a new way of addressing engines when submitting batch- + * buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT` + * inside the flags field of `struct drm_i915_gem_execbuffer2`. + * + * To use it created GEM contexts need to be configured with a list of engines + * the user is intending to submit to. This is accomplished using the + * `I915_CONTEXT_PARAM_ENGINES` parameter and `struct + * i915_context_param_engines`. + * + * For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the + * configured map. + * + * Example of creating such context and submitting against it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = { + * .engines = { { I915_ENGINE_CLASS_RENDER, 0 }, + * { I915_ENGINE_CLASS_COPY, 0 } } + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // We have now created a GEM context with two engines in the map: + * // Index 0 points to rcs0 while index 1 points to bcs0. Other engines + * // will not be accessible from this context. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + */ + struct i915_context_param_engines { __u64 extensions; /* linked chain of extension blocks, 0 terminates */ #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ @@ -2375,6 +2493,76 @@ struct drm_i915_query_topology_info { __u8 data[]; }; +/** + * DOC: Engine Discovery uAPI + * + * Engine discovery uAPI is a way of enumerating physical engines present in a + * GPU associated with an open i915 DRM file descriptor. This supersedes the old + * way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like + * `I915_PARAM_HAS_BLT`. + * + * The need for this interface came starting with Icelake and newer GPUs, which + * started to establish a pattern of having multiple engines of a same class, + * where not all instances were always completely functionally equivalent. + * + * Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the + * `DRM_I915_QUERY_ENGINE_INFO` as the queried item id. + * + * Example for getting the list of engines: + * + * .. code-block:: C + * + * struct drm_i915_query_engine_info *info; + * struct drm_i915_query_item item = { + * .query_id = DRM_I915_QUERY_ENGINE_INFO; + * }; + * struct drm_i915_query query = { + * .num_items = 1, + * .items_ptr = (uintptr_t)&item, + * }; + * int err, i; + * + * // First query the size of the blob we need, this needs to be large + * // enough to hold our array of engines. The kernel will fill out the + * // item.length for us, which is the number of bytes we need. + * // + * // Alternatively a large buffer can be allocated straight away enabling + * // querying in one pass, in which case item.length should contain the + * // length of the provided buffer. + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * info = calloc(1, item.length); + * // Now that we allocated the required number of bytes, we call the ioctl + * // again, this time with the data_ptr pointing to our newly allocated + * // blob, which the kernel can then populate with info on all engines. + * item.data_ptr = (uintptr_t)&info, + * + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * // We can now access each engine in the array + * for (i = 0; i < info->num_engines; i++) { + * struct drm_i915_engine_info einfo = info->engines[i]; + * u16 class = einfo.engine.class; + * u16 instance = einfo.engine.instance; + * .... + * } + * + * free(info); + * + * Each of the enumerated engines, apart from being defined by its class and + * instance (see `struct i915_engine_class_instance`), also can have flags and + * capabilities defined as documented in i915_drm.h. + * + * For instance video engines which support HEVC encoding will have the + * `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set. + * + * Engine discovery only fully comes to its own when combined with the new way + * of addressing engines when submitting batch buffers using contexts with + * engine maps configured. + */ + /** * struct drm_i915_engine_info * -- cgit v1.2.3 From 47316f4a305367794fc04f23e5c778678d8f1d8e Mon Sep 17 00:00:00 2001 From: Zvi Effron Date: Wed, 7 Jul 2021 22:16:55 +0000 Subject: bpf: Support input xdp_md context in BPF_PROG_TEST_RUN Support passing a xdp_md via ctx_in/ctx_out in bpf_attr for BPF_PROG_TEST_RUN. The intended use case is to pass some XDP meta data to the test runs of XDP programs that are used as tail calls. For programs that use bpf_prog_test_run_xdp, support xdp_md input and output. Unlike with an actual xdp_md during a non-test run, data_meta must be 0 because it must point to the start of the provided user data. From the initial xdp_md, use data and data_end to adjust the pointers in the generated xdp_buff. All other non-zero fields are prohibited (with EINVAL). If the user has set ctx_out/ctx_size_out, copy the (potentially different) xdp_md back to the userspace. We require all fields of input xdp_md except the ones we explicitly support to be set to zero. The expectation is that in the future we might add support for more fields and we want to fail explicitly if the user runs the program on the kernel where we don't yet support them. Co-developed-by: Cody Haas Co-developed-by: Lisa Watanabe Signed-off-by: Cody Haas Signed-off-by: Lisa Watanabe Signed-off-by: Zvi Effron Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210707221657.3985075-3-zeffron@riotgames.com --- include/uapi/linux/bpf.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bf9252c7381e..b46a383e8db7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -324,9 +324,6 @@ union bpf_iter_link_info { * **BPF_PROG_TYPE_SK_LOOKUP** * *data_in* and *data_out* must be NULL. * - * **BPF_PROG_TYPE_XDP** - * *ctx_in* and *ctx_out* must be NULL. - * * **BPF_PROG_TYPE_RAW_TRACEPOINT**, * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** * -- cgit v1.2.3 From fe4751c3d513ff4f5422dbf55a966abafe39255e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:06 -0500 Subject: drm/i915: Drop I915_CONTEXT_PARAM_RINGSIZE This reverts commit 88be76cdafc7 ("drm/i915: Allow userspace to specify ringsize on construction"). This API was originally added for OpenCL but the compute-runtime PR has sat open for a year without action so we can still pull it out if we want. I argue we should drop it for three reasons: 1. If the compute-runtime PR has sat open for a year, this clearly isn't that important. 2. It's a very leaky API. Ring size is an implementation detail of the current execlist scheduler and really only makes sense there. It can't apply to the older ring-buffer scheduler on pre-execlist hardware because that's shared across all contexts and it won't apply to the GuC scheduler that's in the pipeline. 3. Having userspace set a ring size in bytes is a bad solution to the problem of having too small a ring. There is no way that userspace has the information to know how to properly set the ring size so it's just going to detect the feature and always set it to the maximum of 512K. This is what the compute-runtime PR does. The scheduler in i915, on the other hand, does have the information to make an informed choice. It could detect if the ring size is a problem and grow it itself. Or, if that's too hard, we could just increase the default size from 16K to 32K or even 64K instead of relying on userspace to do it. Let's drop this API for now and, if someone decides they really care about solving this problem, they can do it properly. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-2-jason@jlekstrand.net --- include/uapi/drm/i915_drm.h | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2f70c48567c0..f229c0abcbb5 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1722,24 +1722,8 @@ struct drm_i915_gem_context_param { */ #define I915_CONTEXT_PARAM_PERSISTENCE 0xb -/* - * I915_CONTEXT_PARAM_RINGSIZE: - * - * Sets the size of the CS ringbuffer to use for logical ring contexts. This - * applies a limit of how many batches can be queued to HW before the caller - * is blocked due to lack of space for more commands. - * - * Only reliably possible to be set prior to first use, i.e. during - * construction. At any later point, the current execution must be flushed as - * the ring can only be changed while the context is idle. Note, the ringsize - * can be specified as a constructor property, see - * I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required. - * - * Only applies to the current set of engine and lost when those engines - * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES). - * - * Must be between 4 - 512 KiB, in intervals of page size [4 KiB]. - * Default is 16 KiB. +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this context param number. */ #define I915_CONTEXT_PARAM_RINGSIZE 0xc /* Must be kept compact -- no holes and well documented */ -- cgit v1.2.3 From 6ff6d61dd2a943bd0c80bb77eb5630e8aa0cac15 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:08 -0500 Subject: drm/i915: Drop I915_CONTEXT_PARAM_NO_ZEROMAP The idea behind this param is to support OpenCL drivers with relocations because OpenCL reserves 0x0 for NULL and, if we placed memory there, it would confuse CL kernels. It was originally sent out as part of a patch series including libdrm [1] and Beignet [2] support. However, the libdrm and Beignet patches never landed in their respective upstream projects so this API has never been used. It's never been used in Mesa or any other driver, either. Dropping this API allows us to delete a small bit of code. [1]: https://lists.freedesktop.org/archives/intel-gfx/2015-May/067030.html [2]: https://lists.freedesktop.org/archives/intel-gfx/2015-May/067031.html Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-4-jason@jlekstrand.net --- include/uapi/drm/i915_drm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f229c0abcbb5..79dcafaf476e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1638,6 +1638,10 @@ struct drm_i915_gem_context_param { __u32 size; __u64 param; #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 +/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance + * someone somewhere has attempted to use it, never re-use this context + * param number. + */ #define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 -- cgit v1.2.3 From 4a766ae40ec8330103a27922b5aa978fdf8bc005 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:11 -0500 Subject: drm/i915: Drop the CONTEXT_CLONE API (v2) This API allows one context to grab bits out of another context upon creation. It can be used as a short-cut for setparam(getparam()) for things like I915_CONTEXT_PARAM_VM. However, it's never been used by any real userspace. It's used by a few IGT tests and that's it. Since it doesn't add any real value (most of the stuff you can CLONE you can copy in other ways), drop it. There is one thing that this API allows you to clone which you cannot clone via getparam/setparam: timelines. However, timelines are an implementation detail of i915 and not really something that needs to be exposed to userspace. Also, sharing timelines between contexts isn't obviously useful and supporting it has the potential to complicate i915 internally. It also doesn't add any functionality that the client can't get in other ways. If a client really wants a shared timeline, they can use a syncobj and set it as an in and out fence on every submit. v2 (Jason Ekstrand): - More detailed commit message Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-7-jason@jlekstrand.net --- include/uapi/drm/i915_drm.h | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 79dcafaf476e..e334a8b14ef2 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2006,20 +2006,10 @@ struct drm_i915_gem_context_create_ext_setparam { struct drm_i915_gem_context_param param; }; -struct drm_i915_gem_context_create_ext_clone { +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this extension number. + */ #define I915_CONTEXT_CREATE_EXT_CLONE 1 - struct i915_user_extension base; - __u32 clone_id; - __u32 flags; -#define I915_CONTEXT_CLONE_ENGINES (1u << 0) -#define I915_CONTEXT_CLONE_FLAGS (1u << 1) -#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2) -#define I915_CONTEXT_CLONE_SSEU (1u << 3) -#define I915_CONTEXT_CLONE_TIMELINE (1u << 4) -#define I915_CONTEXT_CLONE_VM (1u << 5) -#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1) - __u64 rsvd; -}; struct drm_i915_gem_context_destroy { __u32 ctx_id; -- cgit v1.2.3 From caa7302b3a346d9a9ec85568cff52d2cee5f32c4 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 18 Jun 2021 16:21:36 +0200 Subject: media: include/uapi/linux/cec.h: typo: SATERDAY -> SATURDAY Fix typo in a define: CEC_OP_REC_SEQ_SATERDAY -> CEC_OP_REC_SEQ_SATURDAY This isn't used yet in actual applications to the best of my knowledge, and it certainly doesn't break the ABI since the value doesn't change. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/cec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h index dc8879d179fd..de936f5e446d 100644 --- a/include/uapi/linux/cec.h +++ b/include/uapi/linux/cec.h @@ -642,7 +642,7 @@ struct cec_event { #define CEC_OP_REC_SEQ_WEDNESDAY 0x08 #define CEC_OP_REC_SEQ_THURSDAY 0x10 #define CEC_OP_REC_SEQ_FRIDAY 0x20 -#define CEC_OP_REC_SEQ_SATERDAY 0x40 +#define CEC_OP_REC_SEQ_SATURDAY 0x40 #define CEC_OP_REC_SEQ_ONCE_ONLY 0x00 #define CEC_MSG_CLEAR_DIGITAL_TIMER 0x99 -- cgit v1.2.3 From b48c7236b13cb5ef1b5fdf744aa8841df0f7b43a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 29 Jun 2021 15:11:44 -0500 Subject: exit/bdflush: Remove the deprecated bdflush system call The bdflush system call has been deprecated for a very long time. Recently Michael Schmitz tested[1] and found that the last known caller of of the bdflush system call is unaffected by it's removal. Since the code is not needed delete it. [1] https://lkml.kernel.org/r/36123b5d-daa0-6c2b-f2d4-a942f069fd54@gmail.com Link: https://lkml.kernel.org/r/87sg10quue.fsf_-_@disp2133 Tested-by: Michael Schmitz Acked-by: Geert Uytterhoeven Reviewed-by: Arnd Bergmann Acked-by: Cyril Hrubis Signed-off-by: "Eric W. Biederman" --- include/uapi/linux/capability.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 2ddb4226cd23..463d1ba2232a 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -243,7 +243,6 @@ struct vfs_ns_cap_data { /* Allow examination and configuration of disk quotas */ /* Allow setting the domainname */ /* Allow setting the hostname */ -/* Allow calling bdflush() */ /* Allow mount() and umount(), setting up new smb connection */ /* Allow some autofs root ioctls */ /* Allow nfsservctl */ -- cgit v1.2.3 From 289f5a72009b8f67334c9f911f7f5fe6e8a80049 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 5 Jul 2021 14:53:07 +0100 Subject: drm/i915/uapi: convert drm_i915_gem_caching to kernel doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert all the drm_i915_gem_caching bits to proper kernel doc. Suggested-by: Daniel Vetter Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210705135310.1502437-2-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 65 +++++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 29 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e334a8b14ef2..9078ded93558 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1363,43 +1363,50 @@ struct drm_i915_gem_busy { }; /** - * I915_CACHING_NONE + * struct drm_i915_gem_caching - Set or get the caching for given object + * handle. * - * GPU access is not coherent with cpu caches. Default for machines without an - * LLC. + * Allow userspace to control the GTT caching bits for a given object when the + * object is later mapped through the ppGTT(or GGTT on older platforms lacking + * ppGTT support, or if the object is used for scanout). Note that this might + * require unbinding the object from the GTT first, if its current caching value + * doesn't match. */ -#define I915_CACHING_NONE 0 -/** - * I915_CACHING_CACHED - * - * GPU access is coherent with cpu caches and furthermore the data is cached in - * last-level caches shared between cpu cores and the gpu GT. Default on - * machines with HAS_LLC. - */ -#define I915_CACHING_CACHED 1 -/** - * I915_CACHING_DISPLAY - * - * Special GPU caching mode which is coherent with the scanout engines. - * Transparently falls back to I915_CACHING_NONE on platforms where no special - * cache mode (like write-through or gfdt flushing) is available. The kernel - * automatically sets this mode when using a buffer as a scanout target. - * Userspace can manually set this mode to avoid a costly stall and clflush in - * the hotpath of drawing the first frame. - */ -#define I915_CACHING_DISPLAY 2 - struct drm_i915_gem_caching { /** - * Handle of the buffer to set/get the caching level of. */ + * @handle: Handle of the buffer to set/get the caching level. + */ __u32 handle; /** - * Cacheing level to apply or return value + * @caching: The GTT caching level to apply or possible return value. * - * bits0-15 are for generic caching control (i.e. the above defined - * values). bits16-31 are reserved for platform-specific variations - * (e.g. l3$ caching on gen7). */ + * The supported @caching values: + * + * I915_CACHING_NONE: + * + * GPU access is not coherent with CPU caches. Default for machines + * without an LLC. This means manual flushing might be needed, if we + * want GPU access to be coherent. + * + * I915_CACHING_CACHED: + * + * GPU access is coherent with CPU caches and furthermore the data is + * cached in last-level caches shared between CPU cores and the GPU GT. + * + * I915_CACHING_DISPLAY: + * + * Special GPU caching mode which is coherent with the scanout engines. + * Transparently falls back to I915_CACHING_NONE on platforms where no + * special cache mode (like write-through or gfdt flushing) is + * available. The kernel automatically sets this mode when using a + * buffer as a scanout target. Userspace can manually set this mode to + * avoid a costly stall and clflush in the hotpath of drawing the first + * frame. + */ +#define I915_CACHING_NONE 0 +#define I915_CACHING_CACHED 1 +#define I915_CACHING_DISPLAY 2 __u32 caching; }; -- cgit v1.2.3 From 3aa8c57fe25a9247e25977f1c2302395cbbd8242 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 5 Jul 2021 14:53:09 +0100 Subject: drm/i915/uapi: convert drm_i915_gem_set_domain to kernel doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert all the drm_i915_gem_set_domain bits to proper kernel doc. Suggested-by: Daniel Vetter Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210705135310.1502437-4-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 9078ded93558..e54f9efaead0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -880,14 +880,38 @@ struct drm_i915_gem_mmap_offset { __u64 extensions; }; +/** + * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in + * preparation for accessing the pages via some CPU domain. + * + * Specifying a new write or read domain will flush the object out of the + * previous domain(if required), before then updating the objects domain + * tracking with the new domain. + * + * Note this might involve waiting for the object first if it is still active on + * the GPU. + * + * Supported values for @read_domains and @write_domain: + * + * - I915_GEM_DOMAIN_WC: Uncached write-combined domain + * - I915_GEM_DOMAIN_CPU: CPU cache domain + * - I915_GEM_DOMAIN_GTT: Mappable aperture domain + * + * All other domains are rejected. + */ struct drm_i915_gem_set_domain { - /** Handle for the object */ + /** @handle: Handle for the object. */ __u32 handle; - /** New read domains */ + /** @read_domains: New read domains. */ __u32 read_domains; - /** New write domain */ + /** + * @write_domain: New write domain. + * + * Note that having something in the write domain implies it's in the + * read domain, and only that read domain. + */ __u32 write_domain; }; -- cgit v1.2.3 From f170acda7ffaf0473d06e1e17c12cd9fd63904f5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 14 Jul 2021 21:43:17 +0900 Subject: bpf: Fix a typo of reuseport map in bpf.h. Fix s/BPF_MAP_TYPE_REUSEPORT_ARRAY/BPF_MAP_TYPE_REUSEPORT_SOCKARRAY/ typo in bpf.h. Fixes: 2dbb9b9e6df6 ("bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210714124317.67526-1-kuniyu@amazon.co.jp --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b46a383e8db7..bafb6282032b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3246,7 +3246,7 @@ union bpf_attr { * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a - * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. * It checks the selected socket is matching the incoming * request in the socket buffer. * Return -- cgit v1.2.3 From b00628b1c7d595ae5b544e059c27b1f5828314b4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 14 Jul 2021 17:54:09 -0700 Subject: bpf: Introduce bpf timers. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce 'struct bpf_timer { __u64 :64; __u64 :64; };' that can be embedded in hash/array/lru maps as a regular field and helpers to operate on it: // Initialize the timer. // First 4 bits of 'flags' specify clockid. // Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, int flags); // Configure the timer to call 'callback_fn' static function. long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn); // Arm the timer to expire 'nsec' nanoseconds from the current time. long bpf_timer_start(struct bpf_timer *timer, u64 nsec, u64 flags); // Cancel the timer and wait for callback_fn to finish if it was running. long bpf_timer_cancel(struct bpf_timer *timer); Here is how BPF program might look like: struct map_elem { int counter; struct bpf_timer timer; }; struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 1000); __type(key, int); __type(value, struct map_elem); } hmap SEC(".maps"); static int timer_cb(void *map, int *key, struct map_elem *val); /* val points to particular map element that contains bpf_timer. */ SEC("fentry/bpf_fentry_test1") int BPF_PROG(test1, int a) { struct map_elem *val; int key = 0; val = bpf_map_lookup_elem(&hmap, &key); if (val) { bpf_timer_init(&val->timer, &hmap, CLOCK_REALTIME); bpf_timer_set_callback(&val->timer, timer_cb); bpf_timer_start(&val->timer, 1000 /* call timer_cb2 in 1 usec */, 0); } } This patch adds helper implementations that rely on hrtimers to call bpf functions as timers expire. The following patches add necessary safety checks. Only programs with CAP_BPF are allowed to use bpf_timer. The amount of timers used by the program is constrained by the memcg recorded at map creation time. The bpf_timer_init() helper needs explicit 'map' argument because inner maps are dynamic and not known at load time. While the bpf_timer_set_callback() is receiving hidden 'aux->prog' argument supplied by the verifier. The prog pointer is needed to do refcnting of bpf program to make sure that program doesn't get freed while the timer is armed. This approach relies on "user refcnt" scheme used in prog_array that stores bpf programs for bpf_tail_call. The bpf_timer_set_callback() will increment the prog refcnt which is paired with bpf_timer_cancel() that will drop the prog refcnt. The ops->map_release_uref is responsible for cancelling the timers and dropping prog refcnt when user space reference to a map reaches zero. This uref approach is done to make sure that Ctrl-C of user space process will not leave timers running forever unless the user space explicitly pinned a map that contained timers in bpffs. bpf_timer_init() and bpf_timer_set_callback() will return -EPERM if map doesn't have user references (is not held by open file descriptor from user space and not pinned in bpffs). The bpf_map_delete_elem() and bpf_map_update_elem() operations cancel and free the timer if given map element had it allocated. "bpftool map update" command can be used to cancel timers. The 'struct bpf_timer' is explicitly __attribute__((aligned(8))) because '__u64 :64' has 1 byte alignment of 8 byte padding. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Andrii Nakryiko Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20210715005417.78572-4-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bafb6282032b..3544ec5234f0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4777,6 +4777,70 @@ union bpf_attr { * Execute close syscall for given FD. * Return * A syscall result. + * + * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags) + * Description + * Initialize the timer. + * First 4 bits of *flags* specify clockid. + * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. + * All other bits of *flags* are reserved. + * The verifier will reject the program if *timer* is not from + * the same *map*. + * Return + * 0 on success. + * **-EBUSY** if *timer* is already initialized. + * **-EINVAL** if invalid *flags* are passed. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn) + * Description + * Configure the timer to call *callback_fn* static function. + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags) + * Description + * Set timer expiration N nanoseconds from the current time. The + * configured callback will be invoked in soft irq context on some cpu + * and will not repeat unless another bpf_timer_start() is made. + * In such case the next invocation can migrate to a different cpu. + * Since struct bpf_timer is a field inside map element the map + * owns the timer. The bpf_timer_set_callback() will increment refcnt + * of BPF program to make sure that callback_fn code stays valid. + * When user space reference to a map reaches zero all timers + * in a map are cancelled and corresponding program's refcnts are + * decremented. This is done to make sure that Ctrl-C of a user + * process doesn't leave any timers running. If map is pinned in + * bpffs the callback_fn can re-arm itself indefinitely. + * bpf_map_update/delete_elem() helpers and user space sys_bpf commands + * cancel and free the timer in the given map element. + * The map can contain timers that invoke callback_fn-s from different + * programs. The same callback_fn can serve different timers from + * different maps if key/value layout matches across maps. + * Every bpf_timer_set_callback() can have different callback_fn. + * + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier + * or invalid *flags* are passed. + * + * long bpf_timer_cancel(struct bpf_timer *timer) + * Description + * Cancel the timer and wait for callback_fn to finish if it was running. + * Return + * 0 if the timer was not active. + * 1 if the timer was active. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its + * own timer which would have led to a deadlock otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4948,6 +5012,10 @@ union bpf_attr { FN(sys_bpf), \ FN(btf_find_by_name_kind), \ FN(sys_close), \ + FN(timer_init), \ + FN(timer_set_callback), \ + FN(timer_start), \ + FN(timer_cancel), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -6074,6 +6142,11 @@ struct bpf_spin_lock { __u32 val; }; +struct bpf_timer { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. -- cgit v1.2.3 From 9b99edcae5c80c8fb9f8e7149bae528c9e610a72 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 14 Jul 2021 11:43:55 +0200 Subject: bpf: Add bpf_get_func_ip helper for tracing programs Adding bpf_get_func_ip helper for BPF_PROG_TYPE_TRACING programs, specifically for all trampoline attach types. The trampoline's caller IP address is stored in (ctx - 8) address. so there's no reason to actually call the helper, but rather fixup the call instruction and return [ctx - 8] value directly. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210714094400.396467-4-jolsa@kernel.org --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3544ec5234f0..89688f16ad60 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4841,6 +4841,12 @@ union bpf_attr { * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its * own timer which would have led to a deadlock otherwise. + * + * u64 bpf_get_func_ip(void *ctx) + * Description + * Get address of the traced function (for tracing programs). + * Return + * Address of the traced function. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5016,6 +5022,7 @@ union bpf_attr { FN(timer_set_callback), \ FN(timer_start), \ FN(timer_cancel), \ + FN(get_func_ip), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 9ffd9f3ff7193933dae171740ab70a103d460065 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 14 Jul 2021 11:43:56 +0200 Subject: bpf: Add bpf_get_func_ip helper for kprobe programs Adding bpf_get_func_ip helper for BPF_PROG_TYPE_KPROBE programs, so it's now possible to call bpf_get_func_ip from both kprobe and kretprobe programs. Taking the caller's address from 'struct kprobe::addr', which is defined for both kprobe and kretprobe. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Reviewed-by: Masami Hiramatsu Link: https://lore.kernel.org/bpf/20210714094400.396467-5-jolsa@kernel.org --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 89688f16ad60..2db6925e04f4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4844,7 +4844,7 @@ union bpf_attr { * * u64 bpf_get_func_ip(void *ctx) * Description - * Get address of the traced function (for tracing programs). + * Get address of the traced function (for tracing and kprobe programs). * Return * Address of the traced function. */ -- cgit v1.2.3 From b83d23a2a38b1770da0491257ae81d52307f7816 Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Thu, 15 Jul 2021 08:27:54 -0400 Subject: openvswitch: Introduce per-cpu upcall dispatch The Open vSwitch kernel module uses the upcall mechanism to send packets from kernel space to user space when it misses in the kernel space flow table. The upcall sends packets via a Netlink socket. Currently, a Netlink socket is created for every vport. In this way, there is a 1:1 mapping between a vport and a Netlink socket. When a packet is received by a vport, if it needs to be sent to user space, it is sent via the corresponding Netlink socket. This mechanism, with various iterations of the corresponding user space code, has seen some limitations and issues: * On systems with a large number of vports, there is a correspondingly large number of Netlink sockets which can limit scaling. (https://bugzilla.redhat.com/show_bug.cgi?id=1526306) * Packet reordering on upcalls. (https://bugzilla.redhat.com/show_bug.cgi?id=1844576) * A thundering herd issue. (https://bugzilla.redhat.com/show_bug.cgi?id=1834444) This patch introduces an alternative, feature-negotiated, upcall mode using a per-cpu dispatch rather than a per-vport dispatch. In this mode, the Netlink socket to be used for the upcall is selected based on the CPU of the thread that is executing the upcall. In this way, it resolves the issues above as: a) The number of Netlink sockets scales with the number of CPUs rather than the number of vports. b) Ordering per-flow is maintained as packets are distributed to CPUs based on mechanisms such as RSS and flows are distributed to a single user space thread. c) Packets from a flow can only wake up one user space thread. The corresponding user space code can be found at: https://mail.openvswitch.org/pipermail/ovs-dev/2021-July/385139.html Bugzilla: https://bugzilla.redhat.com/1844576 Signed-off-by: Mark Gray Acked-by: Flavio Leitner Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 8d16744edc31..6571b57b2268 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -70,6 +70,8 @@ enum ovs_datapath_cmd { * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should * not be sent. + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the @@ -87,6 +89,9 @@ enum ovs_datapath_attr { OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ OVS_DP_ATTR_PAD, OVS_DP_ATTR_MASKS_CACHE_SIZE, + OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in per-cpu + * dispatch mode + */ __OVS_DP_ATTR_MAX }; @@ -127,6 +132,9 @@ struct ovs_vport_stats { /* Allow tc offload recirc sharing */ #define OVS_DP_F_TC_RECIRC_SHARING (1 << 2) +/* Allow per-cpu dispatch of upcalls */ +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3) + /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) -- cgit v1.2.3 From e7737b67ab46ee0eeaa0ca1958f72d86f8d8ccf6 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 15 Jul 2021 11:15:33 +0100 Subject: drm/i915/uapi: reject caching ioctls for discrete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's a noop on DG1, and in the future when need to support other devices which let us control the coherency, then it should be an immutable creation time property for the BO. This will likely be controlled through a new gem_create_ext extension. v2: add some kernel doc for the discrete changes, and document the implicit rules Suggested-by: Daniel Vetter Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Ramalingam C Reviewed-by: Kenneth Graunke Link: https://patchwork.freedesktop.org/patch/msgid/20210715101536.2606307-2-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e54f9efaead0..868c2ee7be60 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1395,6 +1395,35 @@ struct drm_i915_gem_busy { * ppGTT support, or if the object is used for scanout). Note that this might * require unbinding the object from the GTT first, if its current caching value * doesn't match. + * + * Note that this all changes on discrete platforms, starting from DG1, the + * set/get caching is no longer supported, and is now rejected. Instead the CPU + * caching attributes(WB vs WC) will become an immutable creation time property + * for the object, along with the GTT caching level. For now we don't expose any + * new uAPI for this, instead on DG1 this is all implicit, although this largely + * shouldn't matter since DG1 is coherent by default(without any way of + * controlling it). + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + * + * Side note: Part of the reason for this is that changing the at-allocation-time CPU + * caching attributes for the pages might be required(and is expensive) if we + * need to then CPU map the pages later with different caching attributes. This + * inconsistent caching behaviour, while supported on x86, is not universally + * supported on other architectures. So for simplicity we opt for setting + * everything at creation time, whilst also making it immutable, on discrete + * platforms. */ struct drm_i915_gem_caching { /** -- cgit v1.2.3 From aef7b67a79564f6cff488aff7f4b89438ca80b23 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 15 Jul 2021 11:15:34 +0100 Subject: drm/i915/uapi: convert drm_i915_gem_userptr to kernel doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing kernel-doc. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210715101536.2606307-3-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 868c2ee7be60..e20eeeca7a1c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2141,14 +2141,52 @@ struct drm_i915_reset_stats { __u32 pad; }; +/** + * struct drm_i915_gem_userptr - Create GEM object from user allocated memory. + * + * Userptr objects have several restrictions on what ioctls can be used with the + * object handle. + */ struct drm_i915_gem_userptr { + /** + * @user_ptr: The pointer to the allocated memory. + * + * Needs to be aligned to PAGE_SIZE. + */ __u64 user_ptr; + + /** + * @user_size: + * + * The size in bytes for the allocated memory. This will also become the + * object size. + * + * Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE, + * or larger. + */ __u64 user_size; + + /** + * @flags: + * + * Supported flags: + * + * I915_USERPTR_READ_ONLY: + * + * Mark the object as readonly, this also means GPU access can only be + * readonly. This is only supported on HW which supports readonly access + * through the GTT. If the HW can't support readonly access, an error is + * returned. + * + * I915_USERPTR_UNSYNCHRONIZED: + * + * NOT USED. Setting this flag will result in an error. + */ __u32 flags; #define I915_USERPTR_READ_ONLY 0x1 #define I915_USERPTR_UNSYNCHRONIZED 0x80000000 /** - * Returned handle for the object. + * @handle: Returned handle for the object. * * Object handles are nonzero. */ -- cgit v1.2.3 From f4b7002a7076f025dce59647a77c8251175d2b34 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 19 Jul 2021 20:06:28 +0300 Subject: net: bridge: add vlan mcast snooping knob Add a global knob that controls if vlan multicast snooping is enabled. The proper contexts (vlan or bridge-wide) will be chosen based on the knob when processing packets and changing bridge device state. Note that vlans have their individual mcast snooping enabled by default, but this knob is needed to turn on bridge vlan snooping. It is disabled by default. To enable the knob vlan filtering must also be enabled, it doesn't make sense to have vlan mcast snooping without vlan filtering since that would lead to inconsistencies. Disabling vlan filtering will also automatically disable vlan mcast snooping. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 6b56a7549531..7927ad80ee86 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -720,12 +720,14 @@ struct br_mcast_stats { /* bridge boolean options * BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets + * BR_BOOLOPT_MCAST_VLAN_SNOOPING - control vlan multicast snooping * * IMPORTANT: if adding a new option do not forget to handle * it in br_boolopt_toggle/get and bridge sysfs */ enum br_boolopt_id { BR_BOOLOPT_NO_LL_LEARN, + BR_BOOLOPT_MCAST_VLAN_SNOOPING, BR_BOOLOPT_MAX }; -- cgit v1.2.3 From 1e9ca45662d6bb65fb60d3fbb7737b081d9cffc9 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 19 Jul 2021 20:06:33 +0300 Subject: net: bridge: multicast: include router port vlan id in notifications Use the port multicast context to check if the router port is a vlan and in case it is include its vlan id in the notification. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 7927ad80ee86..90ac9e11c15b 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -629,6 +629,7 @@ enum { MDBA_ROUTER_PATTR_TYPE, MDBA_ROUTER_PATTR_INET_TIMER, MDBA_ROUTER_PATTR_INET6_TIMER, + MDBA_ROUTER_PATTR_VID, __MDBA_ROUTER_PATTR_MAX }; #define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1) -- cgit v1.2.3 From 47ecd2dbd8ec43125ea75d7d2e73c888cda8663f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 19 Jul 2021 20:06:34 +0300 Subject: net: bridge: vlan: add support for global options We can have two types of vlan options depending on context: - per-device vlan options (split in per-bridge and per-port) - global vlan options The second type wasn't supported in the bridge until now, but we need them for per-vlan multicast support, per-vlan STP support and other options which require global vlan context. They are contained in the global bridge vlan context even if the vlan is not configured on the bridge device itself. This patch adds initial netlink attributes and support for setting these global vlan options, they can only be set (RTM_NEWVLAN) and the operation must use the bridge device. Since there are no such options yet it shouldn't have any functional effect. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 90ac9e11c15b..4ed57d1a5d89 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -485,10 +485,15 @@ enum { * [BRIDGE_VLANDB_ENTRY_INFO] * ... * } + * [BRIDGE_VLANDB_GLOBAL_OPTIONS] = { + * [BRIDGE_VLANDB_GOPTS_ID] + * ... + * } */ enum { BRIDGE_VLANDB_UNSPEC, BRIDGE_VLANDB_ENTRY, + BRIDGE_VLANDB_GLOBAL_OPTIONS, __BRIDGE_VLANDB_MAX, }; #define BRIDGE_VLANDB_MAX (__BRIDGE_VLANDB_MAX - 1) @@ -538,6 +543,14 @@ enum { }; #define BRIDGE_VLANDB_STATS_MAX (__BRIDGE_VLANDB_STATS_MAX - 1) +enum { + BRIDGE_VLANDB_GOPTS_UNSPEC, + BRIDGE_VLANDB_GOPTS_ID, + BRIDGE_VLANDB_GOPTS_RANGE, + __BRIDGE_VLANDB_GOPTS_MAX +}; +#define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { -- cgit v1.2.3 From 743a53d9636aad83da63a8638e8365e817ef6365 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 19 Jul 2021 20:06:35 +0300 Subject: net: bridge: vlan: add support for dumping global vlan options Add a new vlan options dump flag which causes only global vlan options to be dumped. The dumps are done only with bridge devices, ports are ignored. They support vlan compression if the options in sequential vlans are equal (currently always true). Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 4ed57d1a5d89..946ccf33dc53 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -479,6 +479,7 @@ enum { /* flags used in BRIDGE_VLANDB_DUMP_FLAGS attribute to affect dumps */ #define BRIDGE_VLANDB_DUMPF_STATS (1 << 0) /* Include stats in the dump */ +#define BRIDGE_VLANDB_DUMPF_GLOBAL (1 << 1) /* Dump global vlan options only */ /* Bridge vlan RTM attributes * [BRIDGE_VLANDB_ENTRY] = { -- cgit v1.2.3 From 9dee572c384846f4ece029ab5688faed0682e48a Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 19 Jul 2021 20:06:37 +0300 Subject: net: bridge: vlan: add mcast snooping control Add a new global vlan option which controls whether multicast snooping is enabled or disabled for a single vlan. It controls the vlan private flag: BR_VLFLAG_GLOBAL_MCAST_ENABLED. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 946ccf33dc53..5aca85874447 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -548,6 +548,7 @@ enum { BRIDGE_VLANDB_GOPTS_UNSPEC, BRIDGE_VLANDB_GOPTS_ID, BRIDGE_VLANDB_GOPTS_RANGE, + BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 11656f593a869a4345e3421037614d2b75ae2ad3 Mon Sep 17 00:00:00 2001 From: Lior Nahmanson Date: Mon, 21 Jun 2021 10:06:16 +0300 Subject: RDMA/mlx5: Add DCS offload support DCS is an offload to SW load balancing of DC initiator work requests. A single DCI can be connected to only one target at the time and can't start new connection until the previous work request is completed. This limitation will cause to delay when the initiator process needs to transfer data to multiple targets at the same time. The SW solution is to use a process that handling and spreading the work request on many DCIs according to destinations. This feature is an offload to this process and coming to reduce the load from the CPU and improve the performance. Link: https://lore.kernel.org/r/491c2c2afdb5b07de7f03eab3f93cf0704549dbc.1624258894.git.leonro@nvidia.com Reviewed-by: Meir Lichtinger Signed-off-by: Lior Nahmanson Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/mlx5-abi.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 8597e6f22a1c..86be4a92b67b 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -50,6 +50,7 @@ enum { MLX5_QP_FLAG_ALLOW_SCATTER_CQE = 1 << 8, MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE = 1 << 9, MLX5_QP_FLAG_UAR_PAGE_INDEX = 1 << 10, + MLX5_QP_FLAG_DCI_STREAM = 1 << 11, }; enum { @@ -238,6 +239,11 @@ struct mlx5_ib_striding_rq_caps { __u32 reserved; }; +struct mlx5_ib_dci_streams_caps { + __u8 max_log_num_concurent; + __u8 max_log_num_errored; +}; + enum mlx5_ib_query_dev_resp_flags { /* Support 128B CQE compression */ MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, @@ -266,7 +272,8 @@ struct mlx5_ib_query_device_resp { struct mlx5_ib_sw_parsing_caps sw_parsing_caps; struct mlx5_ib_striding_rq_caps striding_rq_caps; __u32 tunnel_offloads_caps; /* enum mlx5_ib_tunnel_offloads */ - __u32 reserved; + struct mlx5_ib_dci_streams_caps dci_streams_caps; + __u16 reserved; }; enum mlx5_ib_create_cq_flags { @@ -313,6 +320,11 @@ struct mlx5_ib_create_srq_resp { __u32 reserved; }; +struct mlx5_ib_create_qp_dci_streams { + __u8 log_num_concurent; + __u8 log_num_errored; +}; + struct mlx5_ib_create_qp { __aligned_u64 buf_addr; __aligned_u64 db_addr; @@ -327,7 +339,8 @@ struct mlx5_ib_create_qp { __aligned_u64 access_key; }; __u32 ece_options; - __u32 reserved; + struct mlx5_ib_create_qp_dci_streams dci_streams; + __u16 reserved; }; /* RX Hash function flags */ -- cgit v1.2.3 From 26a4dc29b74a137f45665089f6d3d633fcc9b662 Mon Sep 17 00:00:00 2001 From: "Juan A. Suarez Romero" Date: Tue, 8 Jun 2021 13:15:41 +0200 Subject: drm/v3d: Expose performance counters to userspace The V3D engine has several hardware performance counters that can of interest for userspace performance analysis tools. This exposes new ioctls to create and destroy performance monitor objects, as well as to query the counter values. Each created performance monitor object has an ID that can be attached to CL/CSD submissions, so the driver enables the requested counters when the job is submitted, and updates the performance monitor values when the job is done. It is up to the user to ensure all the jobs have been finished before getting the performance monitor values. It is also up to the user to properly synchronize BCL jobs when submitting jobs with different performance monitors attached. Cc: Daniel Vetter Cc: David Airlie Cc: Emma Anholt To: dri-devel@lists.freedesktop.org Signed-off-by: Juan A. Suarez Romero Acked-by: Melissa Wen Signed-off-by: Melissa Wen Link: https://patchwork.freedesktop.org/patch/msgid/20210608111541.461991-1-jasuarez@igalia.com --- include/uapi/drm/v3d_drm.h | 136 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 1ce746e228d9..4104f22fb3d3 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -38,6 +38,9 @@ extern "C" { #define DRM_V3D_GET_BO_OFFSET 0x05 #define DRM_V3D_SUBMIT_TFU 0x06 #define DRM_V3D_SUBMIT_CSD 0x07 +#define DRM_V3D_PERFMON_CREATE 0x08 +#define DRM_V3D_PERFMON_DESTROY 0x09 +#define DRM_V3D_PERFMON_GET_VALUES 0x0a #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) @@ -47,6 +50,12 @@ extern "C" { #define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset) #define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu) #define DRM_IOCTL_V3D_SUBMIT_CSD DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd) +#define DRM_IOCTL_V3D_PERFMON_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_CREATE, \ + struct drm_v3d_perfmon_create) +#define DRM_IOCTL_V3D_PERFMON_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_DESTROY, \ + struct drm_v3d_perfmon_destroy) +#define DRM_IOCTL_V3D_PERFMON_GET_VALUES DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, \ + struct drm_v3d_perfmon_get_values) #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 @@ -127,6 +136,11 @@ struct drm_v3d_submit_cl { __u32 bo_handle_count; __u32 flags; + + /* ID of the perfmon to attach to this job. 0 means no perfmon. */ + __u32 perfmon_id; + + __u32 pad; }; /** @@ -195,6 +209,7 @@ enum drm_v3d_param { DRM_V3D_PARAM_SUPPORTS_TFU, DRM_V3D_PARAM_SUPPORTS_CSD, DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH, + DRM_V3D_PARAM_SUPPORTS_PERFMON, }; struct drm_v3d_get_param { @@ -258,6 +273,127 @@ struct drm_v3d_submit_csd { __u32 in_sync; /* Sync object to signal when the CSD job is done. */ __u32 out_sync; + + /* ID of the perfmon to attach to this job. 0 means no perfmon. */ + __u32 perfmon_id; +}; + +enum { + V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS, + V3D_PERFCNT_FEP_VALID_PRIMS, + V3D_PERFCNT_FEP_EZ_NFCLIP_QUADS, + V3D_PERFCNT_FEP_VALID_QUADS, + V3D_PERFCNT_TLB_QUADS_STENCIL_FAIL, + V3D_PERFCNT_TLB_QUADS_STENCILZ_FAIL, + V3D_PERFCNT_TLB_QUADS_STENCILZ_PASS, + V3D_PERFCNT_TLB_QUADS_ZERO_COV, + V3D_PERFCNT_TLB_QUADS_NONZERO_COV, + V3D_PERFCNT_TLB_QUADS_WRITTEN, + V3D_PERFCNT_PTB_PRIM_VIEWPOINT_DISCARD, + V3D_PERFCNT_PTB_PRIM_CLIP, + V3D_PERFCNT_PTB_PRIM_REV, + V3D_PERFCNT_QPU_IDLE_CYCLES, + V3D_PERFCNT_QPU_ACTIVE_CYCLES_VERTEX_COORD_USER, + V3D_PERFCNT_QPU_ACTIVE_CYCLES_FRAG, + V3D_PERFCNT_QPU_CYCLES_VALID_INSTR, + V3D_PERFCNT_QPU_CYCLES_TMU_STALL, + V3D_PERFCNT_QPU_CYCLES_SCOREBOARD_STALL, + V3D_PERFCNT_QPU_CYCLES_VARYINGS_STALL, + V3D_PERFCNT_QPU_IC_HIT, + V3D_PERFCNT_QPU_IC_MISS, + V3D_PERFCNT_QPU_UC_HIT, + V3D_PERFCNT_QPU_UC_MISS, + V3D_PERFCNT_TMU_TCACHE_ACCESS, + V3D_PERFCNT_TMU_TCACHE_MISS, + V3D_PERFCNT_VPM_VDW_STALL, + V3D_PERFCNT_VPM_VCD_STALL, + V3D_PERFCNT_BIN_ACTIVE, + V3D_PERFCNT_RDR_ACTIVE, + V3D_PERFCNT_L2T_HITS, + V3D_PERFCNT_L2T_MISSES, + V3D_PERFCNT_CYCLE_COUNT, + V3D_PERFCNT_QPU_CYCLES_STALLED_VERTEX_COORD_USER, + V3D_PERFCNT_QPU_CYCLES_STALLED_FRAGMENT, + V3D_PERFCNT_PTB_PRIMS_BINNED, + V3D_PERFCNT_AXI_WRITES_WATCH_0, + V3D_PERFCNT_AXI_READS_WATCH_0, + V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_0, + V3D_PERFCNT_AXI_READ_STALLS_WATCH_0, + V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_0, + V3D_PERFCNT_AXI_READ_BYTES_WATCH_0, + V3D_PERFCNT_AXI_WRITES_WATCH_1, + V3D_PERFCNT_AXI_READS_WATCH_1, + V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_1, + V3D_PERFCNT_AXI_READ_STALLS_WATCH_1, + V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_1, + V3D_PERFCNT_AXI_READ_BYTES_WATCH_1, + V3D_PERFCNT_TLB_PARTIAL_QUADS, + V3D_PERFCNT_TMU_CONFIG_ACCESSES, + V3D_PERFCNT_L2T_NO_ID_STALL, + V3D_PERFCNT_L2T_COM_QUE_STALL, + V3D_PERFCNT_L2T_TMU_WRITES, + V3D_PERFCNT_TMU_ACTIVE_CYCLES, + V3D_PERFCNT_TMU_STALLED_CYCLES, + V3D_PERFCNT_CLE_ACTIVE, + V3D_PERFCNT_L2T_TMU_READS, + V3D_PERFCNT_L2T_CLE_READS, + V3D_PERFCNT_L2T_VCD_READS, + V3D_PERFCNT_L2T_TMUCFG_READS, + V3D_PERFCNT_L2T_SLC0_READS, + V3D_PERFCNT_L2T_SLC1_READS, + V3D_PERFCNT_L2T_SLC2_READS, + V3D_PERFCNT_L2T_TMU_W_MISSES, + V3D_PERFCNT_L2T_TMU_R_MISSES, + V3D_PERFCNT_L2T_CLE_MISSES, + V3D_PERFCNT_L2T_VCD_MISSES, + V3D_PERFCNT_L2T_TMUCFG_MISSES, + V3D_PERFCNT_L2T_SLC0_MISSES, + V3D_PERFCNT_L2T_SLC1_MISSES, + V3D_PERFCNT_L2T_SLC2_MISSES, + V3D_PERFCNT_CORE_MEM_WRITES, + V3D_PERFCNT_L2T_MEM_WRITES, + V3D_PERFCNT_PTB_MEM_WRITES, + V3D_PERFCNT_TLB_MEM_WRITES, + V3D_PERFCNT_CORE_MEM_READS, + V3D_PERFCNT_L2T_MEM_READS, + V3D_PERFCNT_PTB_MEM_READS, + V3D_PERFCNT_PSE_MEM_READS, + V3D_PERFCNT_TLB_MEM_READS, + V3D_PERFCNT_GMP_MEM_READS, + V3D_PERFCNT_PTB_W_MEM_WORDS, + V3D_PERFCNT_TLB_W_MEM_WORDS, + V3D_PERFCNT_PSE_R_MEM_WORDS, + V3D_PERFCNT_TLB_R_MEM_WORDS, + V3D_PERFCNT_TMU_MRU_HITS, + V3D_PERFCNT_COMPUTE_ACTIVE, + V3D_PERFCNT_NUM, +}; + +#define DRM_V3D_MAX_PERF_COUNTERS 32 + +struct drm_v3d_perfmon_create { + __u32 id; + __u32 ncounters; + __u8 counters[DRM_V3D_MAX_PERF_COUNTERS]; +}; + +struct drm_v3d_perfmon_destroy { + __u32 id; +}; + +/* + * Returns the values of the performance counters tracked by this + * perfmon (as an array of ncounters u64 values). + * + * No implicit synchronization is performed, so the user has to + * guarantee that any jobs using this perfmon have already been + * completed (probably by blocking on the seqno returned by the + * last exec that used the perfmon). + */ +struct drm_v3d_perfmon_get_values { + __u32 id; + __u32 pad; + __u64 values_ptr; }; #if defined(__cplusplus) -- cgit v1.2.3 From 2d151d39073aff498358543801fca0f670fea981 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Sun, 18 Jul 2021 09:11:06 +0200 Subject: xfrm: Add possibility to set the default to block if we have no policy As the default we assume the traffic to pass, if we have no matching IPsec policy. With this patch, we have a possibility to change this default from allow to block. It can be configured via netlink. Each direction (input/output/forward) can be configured separately. With the default to block configuered, we need allow policies for all packet flows we accept. We do not use default policy lookup for the loopback device. v1->v2 - fix compiling when XFRM is disabled - Reported-by: kernel test robot Co-developed-by: Christian Langrock Signed-off-by: Christian Langrock Co-developed-by: Antony Antony Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index ffc6a5391bb7..6e8095106192 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -213,6 +213,11 @@ enum { XFRM_MSG_GETSPDINFO, #define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO + XFRM_MSG_SETDEFAULT, +#define XFRM_MSG_SETDEFAULT XFRM_MSG_SETDEFAULT + XFRM_MSG_GETDEFAULT, +#define XFRM_MSG_GETDEFAULT XFRM_MSG_GETDEFAULT + XFRM_MSG_MAPPING, #define XFRM_MSG_MAPPING XFRM_MSG_MAPPING __XFRM_MSG_MAX @@ -508,6 +513,11 @@ struct xfrm_user_offload { #define XFRM_OFFLOAD_IPV6 1 #define XFRM_OFFLOAD_INBOUND 2 +struct xfrm_userpolicy_default { + __u8 dirmask; + __u8 action; +}; + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 -- cgit v1.2.3 From db67f219fc9365a0c456666ed7c134d43ab0be8a Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 20 Jul 2021 21:42:56 +0200 Subject: uapi: IPv6 IOAM headers definition This patch provides the IPv6 IOAM option header [1] as well as the IOAM Trace header [2]. An IOAM option must be 4n-aligned. Here is an overview of a Hop-by-Hop with an IOAM Trace option: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Next header | Hdr Ext Len | Padding | Padding | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Option Type | Opt Data Len | Reserved | IOAM Type | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Namespace-ID | NodeLen | Flags | RemainingLen| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | IOAM-Trace-Type | Reserved | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+<-+ | | | | node data [n] | | | | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ D | | a | node data [n-1] | t | | a +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ ~ ... ~ S +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ p | | a | node data [1] | c | | e +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | | | | node data [0] | | | | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+<-+ The IOAM option header starts at "Option Type" and ends after "IOAM Type". The IOAM Trace header starts at "Namespace-ID" and ends after "IOAM-Trace-Type/Reserved". IOAM Type: either Pre-allocated Trace (=0), Incremental Trace (=1), Proof-of-Transit (=2) or Edge-to-Edge (=3). Note that both the Pre-allocated Trace and the Incremental Trace look the same. The two others are not implemented. Namespace-ID: IOAM namespace identifier, not to be confused with network namespaces. It adds further context to IOAM options and associated data, and allows devices which are IOAM capable to determine whether IOAM options must be processed or ignored. It can also be used by an operator to distinguish different operational domains or to identify different sets of devices. NodeLen: Length of data added by each node. It depends on the Trace Type. Flags: Only the Overflow (O) flag for now. The O flag is set by a transit node when there are not enough octets left to record its data. RemainingLen: Remaining free space to record data. IOAM-Trace-Type: Bit field where each bit corresponds to a specific kind of IOAM data. See [2] for a detailed list. [1] https://tools.ietf.org/html/draft-ietf-ippm-ioam-ipv6-options [2] https://tools.ietf.org/html/draft-ietf-ippm-ioam-data Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- include/uapi/linux/ioam6.h | 123 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 include/uapi/linux/ioam6.h (limited to 'include/uapi') diff --git a/include/uapi/linux/ioam6.h b/include/uapi/linux/ioam6.h new file mode 100644 index 000000000000..2177e4e49566 --- /dev/null +++ b/include/uapi/linux/ioam6.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM implementation + * + * Author: + * Justin Iurman + */ + +#ifndef _UAPI_LINUX_IOAM6_H +#define _UAPI_LINUX_IOAM6_H + +#include +#include + +/* + * IPv6 IOAM Option Header + */ +struct ioam6_hdr { + __u8 opt_type; + __u8 opt_len; + __u8 :8; /* reserved */ +#define IOAM6_TYPE_PREALLOC 0 + __u8 type; +} __attribute__((packed)); + +/* + * IOAM Trace Header + */ +struct ioam6_trace_hdr { + __be16 namespace_id; + +#if defined(__LITTLE_ENDIAN_BITFIELD) + + __u8 :1, /* unused */ + :1, /* unused */ + overflow:1, + nodelen:5; + + __u8 remlen:7, + :1; /* unused */ + + union { + __be32 type_be32; + + struct { + __u32 bit7:1, + bit6:1, + bit5:1, + bit4:1, + bit3:1, + bit2:1, + bit1:1, + bit0:1, + bit15:1, /* unused */ + bit14:1, /* unused */ + bit13:1, /* unused */ + bit12:1, /* unused */ + bit11:1, + bit10:1, + bit9:1, + bit8:1, + bit23:1, /* reserved */ + bit22:1, + bit21:1, /* unused */ + bit20:1, /* unused */ + bit19:1, /* unused */ + bit18:1, /* unused */ + bit17:1, /* unused */ + bit16:1, /* unused */ + :8; /* reserved */ + } type; + }; + +#elif defined(__BIG_ENDIAN_BITFIELD) + + __u8 nodelen:5, + overflow:1, + :1, /* unused */ + :1; /* unused */ + + __u8 :1, /* unused */ + remlen:7; + + union { + __be32 type_be32; + + struct { + __u32 bit0:1, + bit1:1, + bit2:1, + bit3:1, + bit4:1, + bit5:1, + bit6:1, + bit7:1, + bit8:1, + bit9:1, + bit10:1, + bit11:1, + bit12:1, /* unused */ + bit13:1, /* unused */ + bit14:1, /* unused */ + bit15:1, /* unused */ + bit16:1, /* unused */ + bit17:1, /* unused */ + bit18:1, /* unused */ + bit19:1, /* unused */ + bit20:1, /* unused */ + bit21:1, /* unused */ + bit22:1, + bit23:1, /* reserved */ + :8; /* reserved */ + } type; + }; + +#else +#error "Please fix " +#endif + + __u8 data[0]; +} __attribute__((packed)); + +#endif /* _UAPI_LINUX_IOAM6_H */ -- cgit v1.2.3 From 9ee11f0fff205b4b3df9750bff5e94f97c71b6a0 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 20 Jul 2021 21:42:57 +0200 Subject: ipv6: ioam: Data plane support for Pre-allocated Trace Implement support for processing the IOAM Pre-allocated Trace with IPv6, see [1] and [2]. Introduce a new IPv6 Hop-by-Hop TLV option, see IANA [3]. A new per-interface sysctl is introduced. The value is a boolean to accept (=1) or ignore (=0, by default) IPv6 IOAM options on ingress for an interface: - net.ipv6.conf.XXX.ioam6_enabled Two other sysctls are introduced to define IOAM IDs, represented by an integer. They are respectively per-namespace and per-interface: - net.ipv6.ioam6_id - net.ipv6.conf.XXX.ioam6_id The value of the first one represents the IOAM ID of the node itself (u32; max and default value = U32_MAX>>8, due to hop limit concatenation) while the other represents the IOAM ID of an interface (u16; max and default value = U16_MAX). Each "ioam6_id" sysctl has a "_wide" equivalent: - net.ipv6.ioam6_id_wide - net.ipv6.conf.XXX.ioam6_id_wide The value of the first one represents the wide IOAM ID of the node itself (u64; max and default value = U64_MAX>>8, due to hop limit concatenation) while the other represents the wide IOAM ID of an interface (u32; max and default value = U32_MAX). The use of short and wide equivalents is not exclusive, a deployment could choose to leverage both. For example, net.ipv6.conf.XXX.ioam6_id (short format) could be an identifier for a physical interface, whereas net.ipv6.conf.XXX.ioam6_id_wide (wide format) could be an identifier for a logical sub-interface. Documentation about new sysctls is provided at the end of this patchset. Two relativistic hash tables are used: one for IOAM namespaces, the other for IOAM schemas. A namespace can only have a single active schema and a schema can only be attached to a single namespace (1:1 relationship). [1] https://tools.ietf.org/html/draft-ietf-ippm-ioam-ipv6-options [2] https://tools.ietf.org/html/draft-ietf-ippm-ioam-data [3] https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml#ipv6-parameters-2 Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- include/uapi/linux/in6.h | 1 + include/uapi/linux/ioam6.h | 9 +++++++++ include/uapi/linux/ipv6.h | 3 +++ 3 files changed, 13 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 5ad396a57eb3..c4c53a9ab959 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -145,6 +145,7 @@ struct in6_flowlabel_req { #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ +#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ #define IPV6_TLV_JUMBO 194 #define IPV6_TLV_HAO 201 /* home address option */ diff --git a/include/uapi/linux/ioam6.h b/include/uapi/linux/ioam6.h index 2177e4e49566..23ba6e85582f 100644 --- a/include/uapi/linux/ioam6.h +++ b/include/uapi/linux/ioam6.h @@ -12,6 +12,15 @@ #include #include +#define IOAM6_U16_UNAVAILABLE U16_MAX +#define IOAM6_U32_UNAVAILABLE U32_MAX +#define IOAM6_U64_UNAVAILABLE U64_MAX + +#define IOAM6_DEFAULT_ID (IOAM6_U32_UNAVAILABLE >> 8) +#define IOAM6_DEFAULT_ID_WIDE (IOAM6_U64_UNAVAILABLE >> 8) +#define IOAM6_DEFAULT_IF_ID IOAM6_U16_UNAVAILABLE +#define IOAM6_DEFAULT_IF_ID_WIDE IOAM6_U32_UNAVAILABLE + /* * IPv6 IOAM Option Header */ diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 70603775fe91..b243a53fa985 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -190,6 +190,9 @@ enum { DEVCONF_NDISC_TCLASS, DEVCONF_RPL_SEG_ENABLED, DEVCONF_RA_DEFRTR_METRIC, + DEVCONF_IOAM6_ENABLED, + DEVCONF_IOAM6_ID, + DEVCONF_IOAM6_ID_WIDE, DEVCONF_MAX }; -- cgit v1.2.3 From 8c6f6fa6772696be0c047a711858084b38763728 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 20 Jul 2021 21:42:58 +0200 Subject: ipv6: ioam: IOAM Generic Netlink API Add Generic Netlink commands to allow userspace to configure IOAM namespaces and schemas. The target is iproute2 and the patch is ready. It will be posted as soon as this patchset is merged. Here is an overview: $ ip ioam Usage: ip ioam { COMMAND | help } ip ioam namespace show ip ioam namespace add ID [ data DATA32 ] [ wide DATA64 ] ip ioam namespace del ID ip ioam schema show ip ioam schema add ID DATA ip ioam schema del ID ip ioam namespace set ID schema { ID | none } Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- include/uapi/linux/ioam6_genl.h | 52 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 include/uapi/linux/ioam6_genl.h (limited to 'include/uapi') diff --git a/include/uapi/linux/ioam6_genl.h b/include/uapi/linux/ioam6_genl.h new file mode 100644 index 000000000000..ca4b22833754 --- /dev/null +++ b/include/uapi/linux/ioam6_genl.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM Generic Netlink API + * + * Author: + * Justin Iurman + */ + +#ifndef _UAPI_LINUX_IOAM6_GENL_H +#define _UAPI_LINUX_IOAM6_GENL_H + +#define IOAM6_GENL_NAME "IOAM6" +#define IOAM6_GENL_VERSION 0x1 + +enum { + IOAM6_ATTR_UNSPEC, + + IOAM6_ATTR_NS_ID, /* u16 */ + IOAM6_ATTR_NS_DATA, /* u32 */ + IOAM6_ATTR_NS_DATA_WIDE,/* u64 */ + +#define IOAM6_MAX_SCHEMA_DATA_LEN (255 * 4) + IOAM6_ATTR_SC_ID, /* u32 */ + IOAM6_ATTR_SC_DATA, /* Binary */ + IOAM6_ATTR_SC_NONE, /* Flag */ + + IOAM6_ATTR_PAD, + + __IOAM6_ATTR_MAX, +}; + +#define IOAM6_ATTR_MAX (__IOAM6_ATTR_MAX - 1) + +enum { + IOAM6_CMD_UNSPEC, + + IOAM6_CMD_ADD_NAMESPACE, + IOAM6_CMD_DEL_NAMESPACE, + IOAM6_CMD_DUMP_NAMESPACES, + + IOAM6_CMD_ADD_SCHEMA, + IOAM6_CMD_DEL_SCHEMA, + IOAM6_CMD_DUMP_SCHEMAS, + + IOAM6_CMD_NS_SET_SCHEMA, + + __IOAM6_CMD_MAX, +}; + +#define IOAM6_CMD_MAX (__IOAM6_CMD_MAX - 1) + +#endif /* _UAPI_LINUX_IOAM6_GENL_H */ -- cgit v1.2.3 From 3edede08ff37c6a9370510508d5eeb54890baf47 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 20 Jul 2021 21:42:59 +0200 Subject: ipv6: ioam: Support for IOAM injection with lwtunnels Add support for the IOAM inline insertion (only for the host-to-host use case) which is per-route configured with lightweight tunnels. The target is iproute2 and the patch is ready. It will be posted as soon as this patchset is merged. Here is an overview: $ ip -6 ro ad fc00::1/128 encap ioam6 trace type 0x800000 ns 1 size 12 dev eth0 This example configures an IOAM Pre-allocated Trace option attached to the fc00::1/128 prefix. The IOAM namespace (ns) is 1, the size of the pre-allocated trace data block is 12 octets (size) and only the first IOAM data (bit 0: hop_limit + node id) is included in the trace (type) represented as a bitfield. The reason why the in-transit (IPv6-in-IPv6 encapsulation) use case is not implemented is explained on the patchset cover. Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- include/uapi/linux/ioam6.h | 1 + include/uapi/linux/ioam6_iptunnel.h | 20 ++++++++++++++++++++ include/uapi/linux/lwtunnel.h | 1 + 3 files changed, 22 insertions(+) create mode 100644 include/uapi/linux/ioam6_iptunnel.h (limited to 'include/uapi') diff --git a/include/uapi/linux/ioam6.h b/include/uapi/linux/ioam6.h index 23ba6e85582f..ac4de376f0ce 100644 --- a/include/uapi/linux/ioam6.h +++ b/include/uapi/linux/ioam6.h @@ -126,6 +126,7 @@ struct ioam6_trace_hdr { #error "Please fix " #endif +#define IOAM6_TRACE_DATA_SIZE_MAX 244 __u8 data[0]; } __attribute__((packed)); diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h new file mode 100644 index 000000000000..bae14636a8c8 --- /dev/null +++ b/include/uapi/linux/ioam6_iptunnel.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM Lightweight Tunnel API + * + * Author: + * Justin Iurman + */ + +#ifndef _UAPI_LINUX_IOAM6_IPTUNNEL_H +#define _UAPI_LINUX_IOAM6_IPTUNNEL_H + +enum { + IOAM6_IPTUNNEL_UNSPEC, + IOAM6_IPTUNNEL_TRACE, /* struct ioam6_trace_hdr */ + __IOAM6_IPTUNNEL_MAX, +}; + +#define IOAM6_IPTUNNEL_MAX (__IOAM6_IPTUNNEL_MAX - 1) + +#endif /* _UAPI_LINUX_IOAM6_IPTUNNEL_H */ diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 568a4303ccce..2e206919125c 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -14,6 +14,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_BPF, LWTUNNEL_ENCAP_SEG6_LOCAL, LWTUNNEL_ENCAP_RPL, + LWTUNNEL_ENCAP_IOAM6, __LWTUNNEL_ENCAP_MAX, }; -- cgit v1.2.3 From 7961c5b60f23dff5d82a523f9aeb8ebf34cf9926 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 14 Jul 2021 14:28:33 +0200 Subject: drm/i915: Add TTM offset argument to mmap. The FIXED mapping is only used for ttm, and tells userspace that the mapping type is pre-defined. This disables the other type of mmap offsets when discrete memory is used, so fix the selftests as well. Document the struct as well, so it shows up in docbook. Cc: Jason Ekstrand Reviewed-by: Daniel Vetter Reviewed-by: Jason Ekstrand Signed-off-by: Maarten Lankhorst [mauld: Included minor fixes from the review comments] Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210714122833.766586-1-maarten.lankhorst@linux.intel.com --- include/uapi/drm/i915_drm.h | 47 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e20eeeca7a1c..0aea82657cdc 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -849,31 +849,56 @@ struct drm_i915_gem_mmap_gtt { __u64 offset; }; +/** + * struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object. + * + * This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl, + * and is used to retrieve the fake offset to mmap an object specified by &handle. + * + * The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+. + * `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave + * as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`. + */ struct drm_i915_gem_mmap_offset { - /** Handle for the object being mapped. */ + /** @handle: Handle for the object being mapped. */ __u32 handle; + /** @pad: Must be zero */ __u32 pad; /** - * Fake offset to use for subsequent mmap call + * @offset: The fake offset to use for subsequent mmap call * * This is a fixed-size type for 32/64 compatibility. */ __u64 offset; /** - * Flags for extended behaviour. + * @flags: Flags for extended behaviour. * - * It is mandatory that one of the MMAP_OFFSET types - * (GTT, WC, WB, UC, etc) should be included. + * It is mandatory that one of the `MMAP_OFFSET` types + * should be included: + * + * - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined) + * - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching. + * - `I915_MMAP_OFFSET_WB`: Use Write-Back caching. + * - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching. + * + * On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid + * type. On devices without local memory, this caching mode is invalid. + * + * As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will + * be used, depending on the object placement on creation. WB will be used + * when the object can only exist in system memory, WC otherwise. */ __u64 flags; -#define I915_MMAP_OFFSET_GTT 0 -#define I915_MMAP_OFFSET_WC 1 -#define I915_MMAP_OFFSET_WB 2 -#define I915_MMAP_OFFSET_UC 3 - /* - * Zero-terminated chain of extensions. +#define I915_MMAP_OFFSET_GTT 0 +#define I915_MMAP_OFFSET_WC 1 +#define I915_MMAP_OFFSET_WB 2 +#define I915_MMAP_OFFSET_UC 3 +#define I915_MMAP_OFFSET_FIXED 4 + + /** + * @extensions: Zero-terminated chain of extensions. * * No current extensions defined; mbz. */ -- cgit v1.2.3 From 81340cf3bddded4fe23a55148152e6d5e2460351 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 15 Jul 2021 11:15:36 +0100 Subject: drm/i915/uapi: reject set_domain for discrete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CPU domain should be static for discrete, and on DG1 we don't need any flushing since everything is already coherent, so really all this does is an object wait, for which we have an ioctl. Longer term the desired caching should be an immutable creation time property for the BO, which can be set with something like gem_create_ext. One other user is iris + userptr, which uses the set_domain to probe all the pages to check if the GUP succeeds, however we now have a PROBE flag for this purpose. v2: add some more kernel doc, also add the implicit rules with caching Suggested-by: Daniel Vetter Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Ramalingam C Acked-by: Kenneth Graunke Link: https://patchwork.freedesktop.org/patch/msgid/20210715101536.2606307-5-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 0aea82657cdc..975087553ea0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -923,6 +923,25 @@ struct drm_i915_gem_mmap_offset { * - I915_GEM_DOMAIN_GTT: Mappable aperture domain * * All other domains are rejected. + * + * Note that for discrete, starting from DG1, this is no longer supported, and + * is instead rejected. On such platforms the CPU domain is effectively static, + * where we also only support a single &drm_i915_gem_mmap_offset cache mode, + * which can't be set explicitly and instead depends on the object placements, + * as per the below. + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. */ struct drm_i915_gem_set_domain { /** @handle: Handle for the object. */ -- cgit v1.2.3 From 90c7c70a0a909de87572b9ff81aa35acd49f6473 Mon Sep 17 00:00:00 2001 From: Normunds Rieksts Date: Thu, 1 Jul 2021 18:07:09 +0100 Subject: drm/fourcc: Add modifier definitions for Arm Fixed Rate Compression Arm Fixed Rate Compression (AFRC) is a proprietary fixed rate image compression protocol and format. It is designed to provide guaranteed bandwidth and memory footprint reductions in graphics and media use-cases. This patch aims to add modifier definitions for describing AFRC. Signed-off-by: Normunds Rieksts Reviewed-by: Liviu Dudau Signed-off-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20210701170709.39922-1-normunds.rieksts@arm.com --- include/uapi/drm/drm_fourcc.h | 109 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 106 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index f7156322aba5..9f4bb4a6f358 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -900,9 +900,9 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) /* * The top 4 bits (out of the 56 bits alloted for specifying vendor specific - * modifiers) denote the category for modifiers. Currently we have only two - * categories of modifiers ie AFBC and MISC. We can have a maximum of sixteen - * different categories. + * modifiers) denote the category for modifiers. Currently we have three + * categories of modifiers ie AFBC, MISC and AFRC. We can have a maximum of + * sixteen different categories. */ #define DRM_FORMAT_MOD_ARM_CODE(__type, __val) \ fourcc_mod_code(ARM, ((__u64)(__type) << 52) | ((__val) & 0x000fffffffffffffULL)) @@ -1017,6 +1017,109 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) */ #define AFBC_FORMAT_MOD_USM (1ULL << 12) +/* + * Arm Fixed-Rate Compression (AFRC) modifiers + * + * AFRC is a proprietary fixed rate image compression protocol and format, + * designed to provide guaranteed bandwidth and memory footprint + * reductions in graphics and media use-cases. + * + * AFRC buffers consist of one or more planes, with the same components + * and meaning as an uncompressed buffer using the same pixel format. + * + * Within each plane, the pixel/luma/chroma values are grouped into + * "coding unit" blocks which are individually compressed to a + * fixed size (in bytes). All coding units within a given plane of a buffer + * store the same number of values, and have the same compressed size. + * + * The coding unit size is configurable, allowing different rates of compression. + * + * The start of each AFRC buffer plane must be aligned to an alignment granule which + * depends on the coding unit size. + * + * Coding Unit Size Plane Alignment + * ---------------- --------------- + * 16 bytes 1024 bytes + * 24 bytes 512 bytes + * 32 bytes 2048 bytes + * + * Coding units are grouped into paging tiles. AFRC buffer dimensions must be aligned + * to a multiple of the paging tile dimensions. + * The dimensions of each paging tile depend on whether the buffer is optimised for + * scanline (SCAN layout) or rotated (ROT layout) access. + * + * Layout Paging Tile Width Paging Tile Height + * ------ ----------------- ------------------ + * SCAN 16 coding units 4 coding units + * ROT 8 coding units 8 coding units + * + * The dimensions of each coding unit depend on the number of components + * in the compressed plane and whether the buffer is optimised for + * scanline (SCAN layout) or rotated (ROT layout) access. + * + * Number of Components in Plane Layout Coding Unit Width Coding Unit Height + * ----------------------------- --------- ----------------- ------------------ + * 1 SCAN 16 samples 4 samples + * Example: 16x4 luma samples in a 'Y' plane + * 16x4 chroma 'V' values, in the 'V' plane of a fully-planar YUV buffer + * ----------------------------- --------- ----------------- ------------------ + * 1 ROT 8 samples 8 samples + * Example: 8x8 luma samples in a 'Y' plane + * 8x8 chroma 'V' values, in the 'V' plane of a fully-planar YUV buffer + * ----------------------------- --------- ----------------- ------------------ + * 2 DONT CARE 8 samples 4 samples + * Example: 8x4 chroma pairs in the 'UV' plane of a semi-planar YUV buffer + * ----------------------------- --------- ----------------- ------------------ + * 3 DONT CARE 4 samples 4 samples + * Example: 4x4 pixels in an RGB buffer without alpha + * ----------------------------- --------- ----------------- ------------------ + * 4 DONT CARE 4 samples 4 samples + * Example: 4x4 pixels in an RGB buffer with alpha + */ + +#define DRM_FORMAT_MOD_ARM_TYPE_AFRC 0x02 + +#define DRM_FORMAT_MOD_ARM_AFRC(__afrc_mode) \ + DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_AFRC, __afrc_mode) + +/* + * AFRC coding unit size modifier. + * + * Indicates the number of bytes used to store each compressed coding unit for + * one or more planes in an AFRC encoded buffer. The coding unit size for chrominance + * is the same for both Cb and Cr, which may be stored in separate planes. + * + * AFRC_FORMAT_MOD_CU_SIZE_P0 indicates the number of bytes used to store + * each compressed coding unit in the first plane of the buffer. For RGBA buffers + * this is the only plane, while for semi-planar and fully-planar YUV buffers, + * this corresponds to the luma plane. + * + * AFRC_FORMAT_MOD_CU_SIZE_P12 indicates the number of bytes used to store + * each compressed coding unit in the second and third planes in the buffer. + * For semi-planar and fully-planar YUV buffers, this corresponds to the chroma plane(s). + * + * For single-plane buffers, AFRC_FORMAT_MOD_CU_SIZE_P0 must be specified + * and AFRC_FORMAT_MOD_CU_SIZE_P12 must be zero. + * For semi-planar and fully-planar buffers, both AFRC_FORMAT_MOD_CU_SIZE_P0 and + * AFRC_FORMAT_MOD_CU_SIZE_P12 must be specified. + */ +#define AFRC_FORMAT_MOD_CU_SIZE_MASK 0xf +#define AFRC_FORMAT_MOD_CU_SIZE_16 (1ULL) +#define AFRC_FORMAT_MOD_CU_SIZE_24 (2ULL) +#define AFRC_FORMAT_MOD_CU_SIZE_32 (3ULL) + +#define AFRC_FORMAT_MOD_CU_SIZE_P0(__afrc_cu_size) (__afrc_cu_size) +#define AFRC_FORMAT_MOD_CU_SIZE_P12(__afrc_cu_size) ((__afrc_cu_size) << 4) + +/* + * AFRC scanline memory layout. + * + * Indicates if the buffer uses the scanline-optimised layout + * for an AFRC encoded buffer, otherwise, it uses the rotation-optimised layout. + * The memory layout is the same for all planes. + */ +#define AFRC_FORMAT_MOD_LAYOUT_SCAN (1ULL << 8) + /* * Arm 16x16 Block U-Interleaved modifier * -- cgit v1.2.3 From 50ae81305c7a3ee802f0a1988503ce913c79cd6e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 4 May 2021 11:25:22 -0500 Subject: signal: Verify the alignment and size of siginfo_t Update the static assertions about siginfo_t to also describe it's alignment and size. While investigating if it was possible to add a 64bit field into siginfo_t[1] it became apparent that the alignment of siginfo_t is as much a part of the ABI as the size of the structure. If the alignment changes siginfo_t when embedded in another structure can move to a different offset. Which is not acceptable from an ABI structure. So document that fact and add static assertions to notify developers if they change change the alignment by accident. [1] https://lkml.kernel.org/r/YJEZdhe6JGFNYlum@elver.google.com Acked-by: Marco Elver v1: https://lkml.kernel.org/r/20210505141101.11519-4-ebiederm@xmission.co Link: https://lkml.kernel.org/r/875yxaxmyl.fsf_-_@disp2133 Signed-off-by: "Eric W. Biederman" --- include/uapi/asm-generic/siginfo.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 5a3c221f4c9d..3ba180f550d7 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -29,6 +29,11 @@ typedef union sigval { #define __ARCH_SI_ATTRIBUTES #endif +/* + * Be careful when extending this union. On 32bit siginfo_t is 32bit + * aligned. Which means that a 64bit field or any other field that + * would increase the alignment of siginfo_t will break the ABI. + */ union __sifields { /* kill() */ struct { -- cgit v1.2.3 From 9ffb14ef61bab83fa818736bf3e7e6b6e182e8e2 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 15 Jul 2021 13:07:13 +0300 Subject: move_mount: allow to add a mount into an existing group Previously a sharing group (shared and master ids pair) can be only inherited when mount is created via bindmount. This patch adds an ability to add an existing private mount into an existing sharing group. With this functionality one can first create the desired mount tree from only private mounts (without the need to care about undesired mount propagation or mount creation order implied by sharing group dependencies), and next then setup any desired mount sharing between those mounts in tree as needed. This allows CRIU to restore any set of mount namespaces, mount trees and sharing group trees for a container. We have many issues with restoring mounts in CRIU related to sharing groups and propagation: - reverse sharing groups vs mount tree order requires complex mounts reordering which mostly implies also using some temporary mounts (please see https://lkml.org/lkml/2021/3/23/569 for more info) - mount() syscall creates tons of mounts due to propagation - mount re-parenting due to propagation - "Mount Trap" due to propagation - "Non Uniform" propagation, meaning that with different tricks with mount order and temporary children-"lock" mounts one can create mount trees which can't be restored without those tricks (see https://www.linuxplumbersconf.org/event/7/contributions/640/) With this new functionality we can resolve all the problems with propagation at once. Link: https://lore.kernel.org/r/20210715100714.120228-1-ptikhomirov@virtuozzo.com Cc: Eric W. Biederman Cc: Alexander Viro Cc: Christian Brauner Cc: Mattias Nissler Cc: Aleksa Sarai Cc: Andrei Vagin Cc: linux-fsdevel@vger.kernel.org Cc: linux-api@vger.kernel.org Cc: lkml Co-developed-by: Andrei Vagin Acked-by: Christian Brauner Signed-off-by: Pavel Tikhomirov Signed-off-by: Andrei Vagin Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index dd7a166fdf9c..4d93967f8aea 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -73,7 +73,8 @@ #define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ #define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ #define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ -#define MOVE_MOUNT__MASK 0x00000077 +#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */ +#define MOVE_MOUNT__MASK 0x00000177 /* * fsopen() flags. -- cgit v1.2.3 From d7aff291d069c4418285f3c8ee27b0ff67ce5998 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sat, 26 Jun 2021 06:11:51 +0200 Subject: serial: 8250: Define RX trigger levels for OxSemi 950 devices Oxford Semiconductor 950 serial port devices have a 128-byte FIFO and in the enhanced (650) mode, which we select in `autoconfig_has_efr' with the ECB bit set in the EFR register, they support the receive interrupt trigger level selectable with FCR bits 7:6 from the set of 16, 32, 112, 120. This applies to the original OX16C950 discrete UART[1] as well as 950 cores embedded into more complex devices. For these devices we set the default to 112, which sets an excessively high level of 112 or 7/8 of the FIFO capacity, unlike with other port types where we choose at most 1/2 of their respective FIFO capacities. Additionally we don't make the trigger level configurable. Consequently frequent input overruns happen with high bit rates where hardware flow control cannot be used (e.g. terminal applications) even with otherwise highly-performant systems. Lower the default receive interrupt trigger level to 32 then, and make it configurable. Document the trigger levels along with other port types, including the set of 16, 32, 64, 112 for the transmit interrupt as well[2]. References: [1] "OX16C950 rev B High Performance UART with 128 byte FIFOs", Oxford Semiconductor, Inc., DS-0031, Sep 05, Table 10: "Receiver Trigger Levels", p. 22 [2] same, Table 9: "Transmit Interrupt Trigger Levels", p. 22 Signed-off-by: Maciej W. Rozycki Link: https://lore.kernel.org/r/alpine.DEB.2.21.2106260608480.37803@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_reg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h index be07b5470f4b..f51bc8f36813 100644 --- a/include/uapi/linux/serial_reg.h +++ b/include/uapi/linux/serial_reg.h @@ -62,6 +62,7 @@ * ST16C654: 8 16 56 60 8 16 32 56 PORT_16654 * TI16C750: 1 16 32 56 xx xx xx xx PORT_16750 * TI16C752: 8 16 56 60 8 16 32 56 + * OX16C950: 16 32 112 120 16 32 64 112 PORT_16C950 * Tegra: 1 4 8 14 16 8 4 1 PORT_TEGRA */ #define UART_FCR_R_TRIG_00 0x00 -- cgit v1.2.3 From e4252cb66637b846b916cca7c2cdb4ed22ab2fc3 Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Fri, 23 Jul 2021 10:24:12 -0400 Subject: openvswitch: update kdoc OVS_DP_ATTR_PER_CPU_PIDS Signed-off-by: Mark Gray Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 6571b57b2268..0e436a3755f1 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -70,7 +70,7 @@ enum ovs_datapath_cmd { * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should * not be sent. - * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when + * @OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. -- cgit v1.2.3 From 784dcfa56e0453bb197601ba0b8196f6f892ebcb Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Fri, 23 Jul 2021 10:24:13 -0400 Subject: openvswitch: fix alignment issues Signed-off-by: Mark Gray Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 0e436a3755f1..150bcff49b1c 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -89,8 +89,8 @@ enum ovs_datapath_attr { OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ OVS_DP_ATTR_PAD, OVS_DP_ATTR_MASKS_CACHE_SIZE, - OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in per-cpu - * dispatch mode + OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in + * per-cpu dispatch mode */ __OVS_DP_ATTR_MAX }; -- cgit v1.2.3 From 17ce9c61c71cbc0d7e6ba0a9f34c724fb316a31b Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 26 Jul 2021 07:50:32 +0000 Subject: drm: document DRM_IOCTL_MODE_RMFB Since there's no struct to attach the docs to, document the IOCTL definition. Signed-off-by: Simon Ser Reviewed-by: Pekka Paalanen Reviewed-by: Daniel Vetter Cc: Leandro Ribeiro Link: https://patchwork.freedesktop.org/patch/msgid/ephVkof3uGu2RpOdBbrHE3qF98zBfIBRzXe4Vyoboh0@cp4-web-034.plabs.ch --- include/uapi/drm/drm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index e1f49dd241f7..3b810b53ba8b 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -1050,6 +1050,16 @@ extern "C" { #define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) #define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) #define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) +/** + * DRM_IOCTL_MODE_RMFB - Remove a framebuffer. + * + * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * Warning: removing a framebuffer currently in-use on an enabled plane will + * disable that plane. The CRTC the plane is linked to may also be disabled + * (depending on driver capabilities). + */ #define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) #define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) #define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) -- cgit v1.2.3 From ee242ca704d386991d7ece0c46134e211d52412b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:47 -0700 Subject: drm/i915/guc: Implement GuC priority management Implement a simple static mapping algorithm of the i915 priority levels (int, -1k to 1k exposed to user) to the 4 GuC levels. Mapping is as follows: i915 level < 0 -> GuC low level (3) i915 level == 0 -> GuC normal level (2) i915 level < INT_MAX -> GuC high level (1) i915 level == INT_MAX -> GuC highest level (0) We believe this mapping should cover the UMD use cases (3 distinct user levels + 1 kernel level). In addition to static mapping, a simple counter system is attached to each context tracking the number of requests inflight on the context at each level. This is needed as the GuC levels are per context while in the i915 levels are per request. v2: (Daniele) - Add BUILD_BUG_ON to enforce ordering of priority levels - Add missing lockdep to guc_prio_fini - Check for return before setting context registered flag - Map DISPLAY priority or higher to highest guc prio - Update comment for guc_prio Signed-off-by: Matthew Brost Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-33-matthew.brost@intel.com --- include/uapi/drm/i915_drm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 975087553ea0..7f13d241417f 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -572,6 +572,15 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) #define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) #define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) +/* + * Indicates the 2k user priority levels are statically mapped into 3 buckets as + * follows: + * + * -1k to -1 Low priority + * 0 Normal priority + * 1 to 1k Highest priority + */ +#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5) #define I915_PARAM_HUC_STATUS 42 -- cgit v1.2.3 From 56af5e749f20c3a540310c207dcc373f4f09156e Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 27 Jul 2021 18:33:15 -0700 Subject: net/sched: act_skbmod: Add SKBMOD_F_ECN option support Currently, when doing rate limiting using the tc-police(8) action, the easiest way is to simply drop the packets which exceed or conform the configured bandwidth limit. Add a new option to tc-skbmod(8), so that users may use the ECN [1] extension to explicitly inform the receiver about the congestion instead of dropping packets "on the floor". The 2 least significant bits of the Traffic Class field in IPv4 and IPv6 headers are used to represent different ECN states [2]: 0b00: "Non ECN-Capable Transport", Non-ECT 0b10: "ECN Capable Transport", ECT(0) 0b01: "ECN Capable Transport", ECT(1) 0b11: "Congestion Encountered", CE As an example: $ tc filter add dev eth0 parent 1: protocol ip prio 10 \ matchall action skbmod ecn Doing the above marks all ECT(0) and ECT(1) packets as CE. It does NOT affect Non-ECT or non-IP packets. In the tc-police scenario mentioned above, users may pipe a tc-police action and a tc-skbmod "ecn" action together to achieve ECN-based rate limiting. For TCP connections, upon receiving a CE packet, the receiver will respond with an ECE packet, asking the sender to reduce their congestion window. However ECN also works with other L4 protocols e.g. DCCP and SCTP [2], and our implementation does not touch or care about L4 headers. The updated tc-skbmod SYNOPSIS looks like the following: tc ... action skbmod { set SETTABLE | swap SWAPPABLE | ecn } ... Only one of "set", "swap" or "ecn" shall be used in a single tc-skbmod command. Trying to use more than one of them at a time is considered undefined behavior; pipe multiple tc-skbmod commands together instead. "set" and "swap" only affect Ethernet packets, while "ecn" only affects IPv{4,6} packets. It is also worth mentioning that, in theory, the same effect could be achieved by piping a "police" action and a "bpf" action using the bpf_skb_ecn_set_ce() helper, but this requires eBPF programming from the user, thus impractical. Depends on patch "net/sched: act_skbmod: Skip non-Ethernet packets". [1] https://datatracker.ietf.org/doc/html/rfc3168 [2] https://en.wikipedia.org/wiki/Explicit_Congestion_Notification Reviewed-by: Cong Wang Signed-off-by: Peilin Ye Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_skbmod.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h index c525b3503797..af6ef2cfbf3d 100644 --- a/include/uapi/linux/tc_act/tc_skbmod.h +++ b/include/uapi/linux/tc_act/tc_skbmod.h @@ -17,6 +17,7 @@ #define SKBMOD_F_SMAC 0x2 #define SKBMOD_F_ETYPE 0x4 #define SKBMOD_F_SWAPMAC 0x8 +#define SKBMOD_F_ECN 0x10 struct tc_skbmod { tc_gen; -- cgit v1.2.3 From 125d10373ad991888c9e94d2da49bcc5ccba2127 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 20 Jul 2021 13:42:15 -0700 Subject: dmanegine: idxd: add software command status Enabling device and wq returns standard errno and that does not provide enough details to indicate what exactly failed. The hardware command status is only 8bits. Expand the command status to 32bits and use the upper 16 bits to define software errors to provide more details on the exact failure. Bit 31 will be used to indicate the error is software set as the driver is using some of the spec defined hardware error as well. Cc: Ramesh Thomas Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162681373579.1968485.5891788397526827892.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index e33997b4d750..1c0175aa0e42 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -9,6 +9,29 @@ #include #endif +/* Driver command error status */ +enum idxd_scmd_stat { + IDXD_SCMD_DEV_ENABLED = 0x80000010, + IDXD_SCMD_DEV_NOT_ENABLED = 0x80000020, + IDXD_SCMD_WQ_ENABLED = 0x80000021, + IDXD_SCMD_DEV_DMA_ERR = 0x80020000, + IDXD_SCMD_WQ_NO_GRP = 0x80030000, + IDXD_SCMD_WQ_NO_NAME = 0x80040000, + IDXD_SCMD_WQ_NO_SVM = 0x80050000, + IDXD_SCMD_WQ_NO_THRESH = 0x80060000, + IDXD_SCMD_WQ_PORTAL_ERR = 0x80070000, + IDXD_SCMD_WQ_RES_ALLOC_ERR = 0x80080000, + IDXD_SCMD_PERCPU_ERR = 0x80090000, + IDXD_SCMD_DMA_CHAN_ERR = 0x800a0000, + IDXD_SCMD_CDEV_ERR = 0x800b0000, + IDXD_SCMD_WQ_NO_SWQ_SUPPORT = 0x800c0000, + IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000, + IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, +}; + +#define IDXD_SCMD_SOFTERR_MASK 0x80000000 +#define IDXD_SCMD_SOFTERR_SHIFT 16 + /* Descriptor flags */ #define IDXD_OP_FLAG_FENCE 0x0001 #define IDXD_OP_FLAG_BOF 0x0002 -- cgit v1.2.3 From fc40e5e10c3bcc36f3f765f0d9fae0a13efc7935 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 27 Jul 2021 18:06:17 -0700 Subject: drm/msm: Utilize gpu scheduler priorities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The drm/scheduler provides additional prioritization on top of that provided by however many number of ringbuffers (each with their own priority level) is supported on a given generation. Expose the additional levels of priority to userspace and map the userspace priority back to ring (first level of priority) and schedular priority (additional priority levels within the ring). Signed-off-by: Rob Clark Acked-by: Christian König Link: https://lore.kernel.org/r/20210728010632.2633470-13-robdclark@gmail.com Signed-off-by: Rob Clark --- include/uapi/drm/msm_drm.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index f075851021c3..6b8fffc28a50 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -73,11 +73,19 @@ struct drm_msm_timespec { #define MSM_PARAM_MAX_FREQ 0x04 #define MSM_PARAM_TIMESTAMP 0x05 #define MSM_PARAM_GMEM_BASE 0x06 -#define MSM_PARAM_NR_RINGS 0x07 +#define MSM_PARAM_PRIORITIES 0x07 /* The # of priority levels */ #define MSM_PARAM_PP_PGTABLE 0x08 /* => 1 for per-process pagetables, else 0 */ #define MSM_PARAM_FAULTS 0x09 #define MSM_PARAM_SUSPENDS 0x0a +/* For backwards compat. The original support for preemption was based on + * a single ring per priority level so # of priority levels equals the # + * of rings. With drm/scheduler providing additional levels of priority, + * the number of priorities is greater than the # of rings. The param is + * renamed to better reflect this. + */ +#define MSM_PARAM_NR_RINGS MSM_PARAM_PRIORITIES + struct drm_msm_param { __u32 pipe; /* in, MSM_PIPE_x */ __u32 param; /* in, MSM_PARAM_x */ @@ -304,6 +312,10 @@ struct drm_msm_gem_madvise { #define MSM_SUBMITQUEUE_FLAGS (0) +/* + * The submitqueue priority should be between 0 and MSM_PARAM_PRIORITIES-1, + * a lower numeric value is higher priority. + */ struct drm_msm_submitqueue { __u32 flags; /* in, MSM_SUBMITQUEUE_x */ __u32 prio; /* in, Priority level */ -- cgit v1.2.3 From 433c38f40f6a81cf3988b9372f2983912737f322 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 27 Jul 2021 13:52:56 -0700 Subject: arm64: mte: change ASYNC and SYNC TCF settings into bitfields Allow the user program to specify both ASYNC and SYNC TCF modes by repurposing the existing constants as bitfields. This will allow the kernel to select one of the modes on behalf of the user program. With this patch the kernel will always select async mode, but a subsequent patch will make this configurable. Link: https://linux-review.googlesource.com/id/Icc5923c85a8ea284588cc399ae74fd19ec291230 Signed-off-by: Peter Collingbourne Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210727205300.2554659-3-pcc@google.com Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- include/uapi/linux/prctl.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 967d9c55323d..89de78a14b9b 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -234,14 +234,15 @@ struct prctl_mm_map { #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) /* MTE tag check fault modes */ -# define PR_MTE_TCF_SHIFT 1 -# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_NONE 0 +# define PR_MTE_TCF_SYNC (1UL << 1) +# define PR_MTE_TCF_ASYNC (1UL << 2) +# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) /* MTE tag inclusion mask */ # define PR_MTE_TAG_SHIFT 3 # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) +/* Unused; kept only for source compatibility */ +# define PR_MTE_TCF_SHIFT 1 /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 -- cgit v1.2.3 From 5d8dbb7fb82b8661c16d496644b931c0e2e3a12e Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 28 Jul 2021 19:38:18 +0300 Subject: net: xfrm: fix shift-out-of-bounce We need to check up->dirmask to avoid shift-out-of-bounce bug, since up->dirmask comes from userspace. Also, added XFRM_USERPOLICY_DIRMASK_MAX constant to uapi to inform user-space that up->dirmask has maximum possible value Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") Reported-and-tested-by: syzbot+9cd5837a045bbee5b810@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 6e8095106192..b96c1ea7166d 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -514,6 +514,7 @@ struct xfrm_user_offload { #define XFRM_OFFLOAD_INBOUND 2 struct xfrm_userpolicy_default { +#define XFRM_USERPOLICY_DIRMASK_MAX (sizeof(__u8) * 8) __u8 dirmask; __u8 action; }; -- cgit v1.2.3 From bc49d8169aa72295104f1558830c568efb946315 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 29 Jul 2021 10:20:39 +0800 Subject: mctp: Add MCTP base Add basic Kconfig, an initial (empty) af_mctp source object, and {AF,PF}_MCTP definitions, and the required definitions for a new protocol type. Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/uapi/linux/mctp.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/uapi/linux/mctp.h (limited to 'include/uapi') diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h new file mode 100644 index 000000000000..2640a589c14c --- /dev/null +++ b/include/uapi/linux/mctp.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Management Component Transport Protocol (MCTP) + * + * Copyright (c) 2021 Code Construct + * Copyright (c) 2021 Google + */ + +#ifndef __UAPI_MCTP_H +#define __UAPI_MCTP_H + +struct sockaddr_mctp { +}; + +#endif /* __UAPI_MCTP_H */ -- cgit v1.2.3 From 60fc63981693f807baa0e404104dedea0e8b4e61 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 29 Jul 2021 10:20:42 +0800 Subject: mctp: Add sockaddr_mctp to uapi This change introduces the user-visible MCTP header, containing the protocol-specific addressing definitions. Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/uapi/linux/mctp.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 2640a589c14c..52b54d13f385 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -9,7 +9,28 @@ #ifndef __UAPI_MCTP_H #define __UAPI_MCTP_H +#include + +typedef __u8 mctp_eid_t; + +struct mctp_addr { + mctp_eid_t s_addr; +}; + struct sockaddr_mctp { + unsigned short int smctp_family; + int smctp_network; + struct mctp_addr smctp_addr; + __u8 smctp_type; + __u8 smctp_tag; }; +#define MCTP_NET_ANY 0x0 + +#define MCTP_ADDR_NULL 0x00 +#define MCTP_ADDR_ANY 0xff + +#define MCTP_TAG_MASK 0x07 +#define MCTP_TAG_OWNER 0x08 + #endif /* __UAPI_MCTP_H */ -- cgit v1.2.3 From 4b2e69305cbbc7c32ecbd946110b505c4ff6071a Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 29 Jul 2021 10:20:43 +0800 Subject: mctp: Add initial driver infrastructure Add an empty drivers/net/mctp/, for future interface drivers. Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/uapi/linux/if_arp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h index c3cc5a9e5eaf..4783af9fe520 100644 --- a/include/uapi/linux/if_arp.h +++ b/include/uapi/linux/if_arp.h @@ -54,6 +54,7 @@ #define ARPHRD_X25 271 /* CCITT X.25 */ #define ARPHRD_HWX25 272 /* Boards with X.25 in firmware */ #define ARPHRD_CAN 280 /* Controller Area Network */ +#define ARPHRD_MCTP 290 #define ARPHRD_PPP 512 #define ARPHRD_CISCO 513 /* Cisco HDLC */ #define ARPHRD_HDLC ARPHRD_CISCO -- cgit v1.2.3 From 583be982d93479ea3d85091b0fd0b01201ede87d Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 29 Jul 2021 10:20:44 +0800 Subject: mctp: Add device handling and netlink interface This change adds the infrastructure for managing MCTP netdevices; we add a pointer to the AF_MCTP-specific data to struct netdevice, and hook up the rtnetlink operations for adding and removing addresses. Includes changes from Matt Johnston . Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 3 +++ include/uapi/linux/if_link.h | 10 ++++++++++ include/uapi/linux/mctp.h | 1 + 3 files changed, 14 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index a0b637911d3c..5f589c7a8382 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -151,6 +151,9 @@ #define ETH_P_MAP 0x00F9 /* Qualcomm multiplexing and * aggregation protocol */ +#define ETH_P_MCTP 0x00FA /* Management component transport + * protocol packets + */ /* * This is an Ethernet frame header. diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4882e81514b6..49b22afab78f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1260,4 +1260,14 @@ struct ifla_rmnet_flags { __u32 mask; }; +/* MCTP section */ + +enum { + IFLA_MCTP_UNSPEC, + IFLA_MCTP_NET, + __IFLA_MCTP_MAX, +}; + +#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 52b54d13f385..a9d8edb3402b 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -26,6 +26,7 @@ struct sockaddr_mctp { }; #define MCTP_NET_ANY 0x0 +#define MCTP_NET_DEFAULT 0x0 #define MCTP_ADDR_NULL 0x00 #define MCTP_ADDR_ANY 0xff -- cgit v1.2.3 From 03f2bbc4ee57ca53b2fa1d9caabc5006e0b8f375 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 29 Jul 2021 10:20:52 +0800 Subject: mctp: Allow per-netns default networks Currently we have a compile-time default network (MCTP_INITIAL_DEFAULT_NET). This change introduces a default_net field on the net namespace, allowing future configuration for new interfaces. Signed-off-by: Matt Johnston Signed-off-by: David S. Miller --- include/uapi/linux/mctp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index a9d8edb3402b..52b54d13f385 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -26,7 +26,6 @@ struct sockaddr_mctp { }; #define MCTP_NET_ANY 0x0 -#define MCTP_NET_DEFAULT 0x0 #define MCTP_ADDR_NULL 0x00 #define MCTP_ADDR_ANY 0xff -- cgit v1.2.3 From 695176bfe5dec2051f950bdac0ae0b21e29e6de3 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 29 Jul 2021 16:12:14 -0700 Subject: net_sched: refactor TC action init API TC action ->init() API has 10 parameters, it becomes harder to read. Some of them are just boolean and can be replaced by flags. Similarly for the internal API tcf_action_init() and tcf_exts_validate(). This patch converts them to flags and fold them into the upper 16 bits of "flags", whose lower 16 bits are still reserved for user-space. More specifically, the following kernel flags are introduced: TCA_ACT_FLAGS_POLICE replace 'name' in a few contexts, to distinguish whether it is compatible with policer. TCA_ACT_FLAGS_BIND replaces 'bind', to indicate whether this action is bound to a filter. TCA_ACT_FLAGS_REPLACE replaces 'ovr' in most contexts, means we are replacing an existing action. TCA_ACT_FLAGS_NO_RTNL replaces 'rtnl_held' but has the opposite meaning, because we still hold RTNL in most cases. The only user-space flag TCA_ACT_FLAGS_NO_PERCPU_STATS is untouched and still stored as before. I have tested this patch with tdc and I do not see any failure related to this patch. Tested-by: Vlad Buslov Acked-by: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 025c40fef93d..6836ccb9c45d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -22,6 +22,7 @@ enum { __TCA_ACT_MAX }; +/* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */ #define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for * actions stats. */ -- cgit v1.2.3 From 6e5b47a4f1dde38d42b054cc6d16b6840de08bd2 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 2 Aug 2021 07:28:35 +0000 Subject: drm: document drm_mode_get_property It's not obvious what the fields mean and how they should be used. The most important detail is the link to drm_property.flags, which describes how property types work. v2: document enum drm_mode_property_enum, add ref to "Modeset Base Object Abstraction" (Daniel) Signed-off-by: Simon Ser Acked-by: Pekka Paalanen Acked-by: Daniel Vetter Cc: Leandro Ribeiro Link: https://patchwork.freedesktop.org/patch/msgid/20210802072826.500078-1-contact@emersion.fr --- include/uapi/drm/drm_mode.h | 60 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 98bf130feda5..90c55383f1ee 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -541,22 +541,74 @@ struct drm_mode_get_connector { */ #define DRM_MODE_PROP_ATOMIC 0x80000000 +/** + * struct drm_mode_property_enum - Description for an enum/bitfield entry. + * @value: numeric value for this enum entry. + * @name: symbolic name for this enum entry. + * + * See struct drm_property_enum for details. + */ struct drm_mode_property_enum { __u64 value; char name[DRM_PROP_NAME_LEN]; }; +/** + * struct drm_mode_get_property - Get property metadata. + * + * User-space can perform a GETPROPERTY ioctl to retrieve information about a + * property. The same property may be attached to multiple objects, see + * "Modeset Base Object Abstraction". + * + * The meaning of the @values_ptr field changes depending on the property type. + * See &drm_property.flags for more details. + * + * The @enum_blob_ptr and @count_enum_blobs fields are only meaningful when the + * property has the type &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK. For + * backwards compatibility, the kernel will always set @count_enum_blobs to + * zero when the property has the type &DRM_MODE_PROP_BLOB. User-space must + * ignore these two fields if the property has a different type. + * + * User-space is expected to retrieve values and enums by performing this ioctl + * at least twice: the first time to retrieve the number of elements, the + * second time to retrieve the elements themselves. + * + * To retrieve the number of elements, set @count_values and @count_enum_blobs + * to zero, then call the ioctl. @count_values will be updated with the number + * of elements. If the property has the type &DRM_MODE_PROP_ENUM or + * &DRM_MODE_PROP_BITMASK, @count_enum_blobs will be updated as well. + * + * To retrieve the elements themselves, allocate an array for @values_ptr and + * set @count_values to its capacity. If the property has the type + * &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK, allocate an array for + * @enum_blob_ptr and set @count_enum_blobs to its capacity. Calling the ioctl + * again will fill the arrays. + */ struct drm_mode_get_property { - __u64 values_ptr; /* values and blob lengths */ - __u64 enum_blob_ptr; /* enum and blob id ptrs */ + /** @values_ptr: Pointer to a ``__u64`` array. */ + __u64 values_ptr; + /** @enum_blob_ptr: Pointer to a struct drm_mode_property_enum array. */ + __u64 enum_blob_ptr; + /** + * @prop_id: Object ID of the property which should be retrieved. Set + * by the caller. + */ __u32 prop_id; + /** + * @flags: ``DRM_MODE_PROP_*`` bitfield. See &drm_property.flags for + * a definition of the flags. + */ __u32 flags; + /** + * @name: Symbolic property name. User-space should use this field to + * recognize properties. + */ char name[DRM_PROP_NAME_LEN]; + /** @count_values: Number of elements in @values_ptr. */ __u32 count_values; - /* This is only used to count enum values, not blobs. The _blobs is - * simply because of a historical reason, i.e. backwards compat. */ + /** @count_enum_blobs: Number of elements in @enum_blob_ptr. */ __u32 count_enum_blobs; }; -- cgit v1.2.3 From 2d3e5caf96b9449af951e63476657acd759c1a30 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 31 Jul 2021 12:08:30 -0500 Subject: net/ipv4: Replace one-element array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. Use an anonymous union with a couple of anonymous structs in order to keep userspace unchanged: $ pahole -C ip_msfilter net/ipv4/ip_sockglue.o struct ip_msfilter { union { struct { __be32 imsf_multiaddr_aux; /* 0 4 */ __be32 imsf_interface_aux; /* 4 4 */ __u32 imsf_fmode_aux; /* 8 4 */ __u32 imsf_numsrc_aux; /* 12 4 */ __be32 imsf_slist[1]; /* 16 4 */ }; /* 0 20 */ struct { __be32 imsf_multiaddr; /* 0 4 */ __be32 imsf_interface; /* 4 4 */ __u32 imsf_fmode; /* 8 4 */ __u32 imsf_numsrc; /* 12 4 */ __be32 imsf_slist_flex[0]; /* 16 0 */ }; /* 0 16 */ }; /* 0 20 */ /* size: 20, cachelines: 1, members: 1 */ /* last cacheline: 20 bytes */ }; Also, refactor the code accordingly and make use of the struct_size() and flex_array_size() helpers. This helps with the ongoing efforts to globally enable -Warray-bounds and get us closer to being able to tighten the FORTIFY_SOURCE routines on memcpy(). [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.10/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/79 Link: https://github.com/KSPP/linux/issues/109 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index d1b327036ae4..193b7cf1f0ac 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -188,11 +188,22 @@ struct ip_mreq_source { }; struct ip_msfilter { - __be32 imsf_multiaddr; - __be32 imsf_interface; - __u32 imsf_fmode; - __u32 imsf_numsrc; - __be32 imsf_slist[1]; + union { + struct { + __be32 imsf_multiaddr_aux; + __be32 imsf_interface_aux; + __u32 imsf_fmode_aux; + __u32 imsf_numsrc_aux; + __be32 imsf_slist[1]; + }; + struct { + __be32 imsf_multiaddr; + __be32 imsf_interface; + __u32 imsf_fmode; + __u32 imsf_numsrc; + __be32 imsf_slist_flex[]; + }; + }; }; #define IP_MSFILTER_SIZE(numsrc) \ -- cgit v1.2.3 From 06447ae5e33bfbc5a777cc06d9854a31f3912833 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 14 Jul 2021 21:56:55 +0200 Subject: ioprio: move user space relevant ioprio bits to UAPI includes systemd added a modified copy of include/linux/ioprio.h into its code to get the relevant content definitions for the exposed ioprio_[get|set] system calls. Move the user space relevant ioprio bits to the UAPI includes to be able to use the ioprio_[get|set] syscalls as intended. Cc: Kay Sievers Cc: Greg Kroah-Hartman Cc: Jens Axboe Cc: linux-block@vger.kernel.org Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20210714195655.181943-1-socketcan@hartkopp.net Signed-off-by: Jens Axboe --- include/uapi/linux/ioprio.h | 46 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 include/uapi/linux/ioprio.h (limited to 'include/uapi') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h new file mode 100644 index 000000000000..77b17e08b0da --- /dev/null +++ b/include/uapi/linux/ioprio.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_IOPRIO_H +#define _UAPI_LINUX_IOPRIO_H + +/* + * Gives us 8 prio classes with 13-bits of data for each class + */ +#define IOPRIO_CLASS_SHIFT (13) +#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) + +#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) +#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) +#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) + +/* + * These are the io priority groups as implemented by CFQ. RT is the realtime + * class, it always gets premium service. BE is the best-effort scheduling + * class, the default for any process. IDLE is the idle scheduling class, it + * is only served when no one else is using the disk. + */ +enum { + IOPRIO_CLASS_NONE, + IOPRIO_CLASS_RT, + IOPRIO_CLASS_BE, + IOPRIO_CLASS_IDLE, +}; + +#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE) + +/* + * 8 best effort priority levels are supported + */ +#define IOPRIO_BE_NR (8) + +enum { + IOPRIO_WHO_PROCESS = 1, + IOPRIO_WHO_PGRP, + IOPRIO_WHO_USER, +}; + +/* + * Fallback BE priority + */ +#define IOPRIO_NORM (4) + +#endif /* _UAPI_LINUX_IOPRIO_H */ -- cgit v1.2.3 From 7957d93bf32bc211415827e44fdd9cdf1388df59 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 13 Jul 2021 01:05:27 +0200 Subject: block: add ioctl to read the disk sequence number Add a new BLKGETDISKSEQ ioctl which retrieves the disk sequence number from the genhd structure. # ./getdiskseq /dev/loop* /dev/loop0: 13 /dev/loop0p1: 13 /dev/loop0p2: 13 /dev/loop0p3: 13 /dev/loop1: 14 /dev/loop1p1: 14 /dev/loop1p2: 14 /dev/loop2: 5 /dev/loop3: 6 Reviewed-by: Christoph Hellwig Signed-off-by: Matteo Croce Tested-by: Luca Boccassi Link: https://lore.kernel.org/r/20210712230530.29323-4-mcroce@linux.microsoft.com Signed-off-by: Jens Axboe --- include/uapi/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 4c32e97dcdf0..bdf7b404b3e7 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -184,6 +184,7 @@ struct fsxattr { #define BLKSECDISCARD _IO(0x12,125) #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) +#define BLKGETDISKSEQ _IOR(0x12,128,__u64) /* * A jump here: 130-136 are reserved for zoned block devices * (see uapi/linux/blkzoned.h) -- cgit v1.2.3 From 3a755cd8b7c601f756cbbf908b84f7cc8c04a02b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 2 Aug 2021 11:02:19 +0800 Subject: bonding: add new option lacp_active Add an option lacp_active, which is similar with team's runner.active. This option specifies whether to send LACPDU frames periodically. If set on, the LACPDU frames are sent along with the configured lacp_rate setting. If set off, the LACPDU frames acts as "speak when spoken to". Note, the LACPDU state frames still will be sent when init or unbind port. v2: remove module parameter Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 49b22afab78f..5310003523ce 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -855,6 +855,7 @@ enum { IFLA_BOND_AD_ACTOR_SYSTEM, IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, __IFLA_BOND_MAX, }; -- cgit v1.2.3 From 018c14911dd7e2feedd96d440f12ea999e459fff Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Tue, 13 Jul 2021 19:50:21 +0200 Subject: scsi: target: tcmu: Add new feature KEEP_BUF When running command pipelining for WRITE direction commands (e.g. tape device write), userspace sends cmd completion to cmd ring before processing write data. In that case userspace has to copy data before sending completion, because cmd completion also implicitly releases the data buffer in data area. The new feature KEEP_BUF allows userspace to optionally keep the buffer after completion by setting new bit TCMU_UFLAG_KEEP_BUF in tcmu_cmd_entry_hdr->uflags. In that case buffer has to be released explicitly by writing the cmd_id to new action item free_kept_buf. All kept buffers are released during reset_ring and if userspace closes uio device (tcmu_release). Link: https://lore.kernel.org/r/20210713175021.20103-1-bostroesser@gmail.com Reviewed-by: Mike Christie Signed-off-by: Bodo Stroesser Signed-off-by: Martin K. Petersen --- include/uapi/linux/target_core_user.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index 95b1597f16ae..27ace512babd 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -46,6 +46,7 @@ #define TCMU_MAILBOX_FLAG_CAP_OOOC (1 << 0) /* Out-of-order completions */ #define TCMU_MAILBOX_FLAG_CAP_READ_LEN (1 << 1) /* Read data length */ #define TCMU_MAILBOX_FLAG_CAP_TMR (1 << 2) /* TMR notifications */ +#define TCMU_MAILBOX_FLAG_CAP_KEEP_BUF (1<<3) /* Keep buf after cmd completion */ struct tcmu_mailbox { __u16 version; @@ -75,6 +76,7 @@ struct tcmu_cmd_entry_hdr { __u8 kflags; #define TCMU_UFLAG_UNKNOWN_OP 0x1 #define TCMU_UFLAG_READ_LEN 0x2 +#define TCMU_UFLAG_KEEP_BUF 0x4 __u8 uflags; } __packed; -- cgit v1.2.3 From 5b9272e93f2efe3f6cda60cc2c26817b2ce49386 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 7 Jul 2021 11:48:54 +0200 Subject: can: j1939: extend UAPI to notify about RX status To be able to create applications with user friendly feedback, we need be able to provide receive status information. Typical ETP transfer may take seconds or even hours. To give user some clue or show a progress bar, the stack should push status updates. Same as for the TX information, the socket error queue will be used with following new signals: - J1939_EE_INFO_RX_RTS - received and accepted request to send signal. - J1939_EE_INFO_RX_DPO - received data package offset signal - J1939_EE_INFO_RX_ABORT - RX session was aborted Instead of completion signal, user will get data package. To activate this signals, application should set SOF_TIMESTAMPING_RX_SOFTWARE to the SO_TIMESTAMPING socket option. This will avoid unpredictable application behavior for the old software. Link: https://lore.kernel.org/r/20210707094854.30781-3-o.rempel@pengutronix.de Signed-off-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/j1939.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/can/j1939.h b/include/uapi/linux/can/j1939.h index df6e821075c1..38936460f668 100644 --- a/include/uapi/linux/can/j1939.h +++ b/include/uapi/linux/can/j1939.h @@ -78,11 +78,20 @@ enum { enum { J1939_NLA_PAD, J1939_NLA_BYTES_ACKED, + J1939_NLA_TOTAL_SIZE, + J1939_NLA_PGN, + J1939_NLA_SRC_NAME, + J1939_NLA_DEST_NAME, + J1939_NLA_SRC_ADDR, + J1939_NLA_DEST_ADDR, }; enum { J1939_EE_INFO_NONE, J1939_EE_INFO_TX_ABORT, + J1939_EE_INFO_RX_RTS, + J1939_EE_INFO_RX_DPO, + J1939_EE_INFO_RX_ABORT, }; struct j1939_filter { -- cgit v1.2.3 From 04190bf8944deb7e3ac165a1a494db23aa0160a9 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Wed, 4 Aug 2021 10:55:56 +0300 Subject: sock: allow reading and changing sk_userlocks with setsockopt SOCK_SNDBUF_LOCK and SOCK_RCVBUF_LOCK flags disable automatic socket buffers adjustment done by kernel (see tcp_fixup_rcvbuf() and tcp_sndbuf_expand()). If we've just created a new socket this adjustment is enabled on it, but if one changes the socket buffer size by setsockopt(SO_{SND,RCV}BUF*) it becomes disabled. CRIU needs to call setsockopt(SO_{SND,RCV}BUF*) on each socket on restore as it first needs to increase buffer sizes for packet queues restore and second it needs to restore back original buffer sizes. So after CRIU restore all sockets become non-auto-adjustable, which can decrease network performance of restored applications significantly. CRIU need to be able to restore sockets with enabled/disabled adjustment to the same state it was before dump, so let's add special setsockopt for it. Let's also export SOCK_SNDBUF_LOCK and SOCK_RCVBUF_LOCK flags to uAPI so that using these interface one can reenable automatic socket buffer adjustment on their sockets. Signed-off-by: Pavel Tikhomirov Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/asm-generic/socket.h | 2 ++ include/uapi/linux/socket.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index d588c244ec2f..1f0a2b4864e4 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -124,6 +124,8 @@ #define SO_NETNS_COOKIE 71 +#define SO_BUF_LOCK 72 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index c3409c8ec0dd..eb0a9a5b6e71 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -26,4 +26,9 @@ struct __kernel_sockaddr_storage { }; }; +#define SOCK_SNDBUF_LOCK 1 +#define SOCK_RCVBUF_LOCK 2 + +#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) + #endif /* _UAPI_LINUX_SOCKET_H */ -- cgit v1.2.3 From 9d5adeecc409365e45cd89657f0392d0dd5c217f Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Tue, 22 Jun 2021 13:39:57 +0200 Subject: media: v4l2-ctrls: Add intra-refresh period control Add a control to set intra-refresh period. Acked-by: Hans Verkuil Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index fdf97a6d7d18..5532b5f68493 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -435,6 +435,7 @@ enum v4l2_mpeg_video_multi_slice_mode { #define V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX (V4L2_CID_CODEC_BASE+233) #define V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES (V4L2_CID_CODEC_BASE+234) #define V4L2_CID_MPEG_VIDEO_DEC_CONCEAL_COLOR (V4L2_CID_CODEC_BASE+235) +#define V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD (V4L2_CID_CODEC_BASE+236) /* CIDs for the MPEG-2 Part 2 (H.262) codec */ #define V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL (V4L2_CID_CODEC_BASE+270) -- cgit v1.2.3 From 46abe13b5e3db187e52cd0de06c07bbce010726c Mon Sep 17 00:00:00 2001 From: Igor Skalkin Date: Tue, 3 Aug 2021 14:10:24 +0100 Subject: firmware: arm_scmi: Add virtio transport This transport enables communications with an SCMI platform through virtio; the SCMI platform will be represented by a virtio device. Implement an SCMI virtio driver according to the virtio SCMI device spec [1]. Virtio device id 32 has been reserved for the SCMI device [2]. The virtio transport has one Tx channel (virtio cmdq, A2P channel) and at most one Rx channel (virtio eventq, P2A channel). The following feature bit defined in [1] is not implemented: VIRTIO_SCMI_F_SHARED_MEMORY. The number of messages which can be pending simultaneously is restricted according to the virtqueue capacity negotiated at probing time. As soon as Rx channel message buffers are allocated or have been read out by the arm-scmi driver, feed them back to the virtio device. Since some virtio devices may not have the short response time exhibited by SCMI platforms using other transports, set a generous response timeout. SCMI polling mode is not supported by this virtio transport since deemed meaningless: polling mode operation is offered by the SCMI core to those transports that could not provide a completion interrupt on the TX path, which is never the case for virtio whose core callbacks can easily call into core scmi_rx_callback upon messages reception. [1] https://github.com/oasis-tcs/virtio-spec/blob/master/virtio-scmi.tex [2] https://www.oasis-open.org/committees/ballot.php?id=3496 Link: https://lore.kernel.org/r/20210803131024.40280-16-cristian.marussi@arm.com Cc: "Michael S. Tsirkin" Cc: Jason Wang Co-developed-by: Peter Hilber Co-developed-by: Cristian Marussi Signed-off-by: Igor Skalkin [ Peter: Adapted patch for submission to upstream. ] Signed-off-by: Peter Hilber [ Cristian: simplified driver logic, changed link_supplier and channel available/setup logic, removed dummy callbacks ] Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/uapi/linux/virtio_ids.h | 1 + include/uapi/linux/virtio_scmi.h | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 include/uapi/linux/virtio_scmi.h (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 70a8057ad4bb..f74155f6882d 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -55,6 +55,7 @@ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_SCMI 32 /* virtio SCMI */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */ /* diff --git a/include/uapi/linux/virtio_scmi.h b/include/uapi/linux/virtio_scmi.h new file mode 100644 index 000000000000..f8ddd04a3ace --- /dev/null +++ b/include/uapi/linux/virtio_scmi.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* + * Copyright (C) 2020-2021 OpenSynergy GmbH + * Copyright (C) 2021 ARM Ltd. + */ + +#ifndef _UAPI_LINUX_VIRTIO_SCMI_H +#define _UAPI_LINUX_VIRTIO_SCMI_H + +#include + +/* Device implements some SCMI notifications, or delayed responses. */ +#define VIRTIO_SCMI_F_P2A_CHANNELS 0 + +/* Device implements any SCMI statistics shared memory region */ +#define VIRTIO_SCMI_F_SHARED_MEMORY 1 + +/* Virtqueues */ + +#define VIRTIO_SCMI_VQ_TX 0 /* cmdq */ +#define VIRTIO_SCMI_VQ_RX 1 /* eventq */ +#define VIRTIO_SCMI_VQ_MAX_CNT 2 + +#endif /* _UAPI_LINUX_VIRTIO_SCMI_H */ -- cgit v1.2.3 From b65a9489730a2494f7a2a33a6eb0a12b8f1dd193 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 23 Jul 2021 12:34:05 +0100 Subject: drm/i915/userptr: Probe existence of backing struct pages upon creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jason Ekstrand requested a more efficient method than userptr+set-domain to determine if the userptr object was backed by a complete set of pages upon creation. To be more efficient than simply populating the userptr using get_user_pages() (as done by the call to set-domain or execbuf), we can walk the tree of vm_area_struct and check for gaps or vma not backed by struct page (VM_PFNMAP). The question is how to handle VM_MIXEDMAP which may be either struct page or pfn backed... With discrete we are going to drop support for set_domain(), so offering a way to probe the pages, without having to resort to dummy batches has been requested. v2: - add new query param for the PROBE flag, so userspace can easily check if the kernel supports it(Jason). - use mmap_read_{lock, unlock}. - add some kernel-doc. v3: - In the docs also mention that PROBE doesn't guarantee that the pages will remain valid by the time they are actually used(Tvrtko). - Add a small comment for the hole finding logic(Jason). - Move the param next to all the other params which just return true. Testcase: igt/gem_userptr_blits/probe Signed-off-by: Chris Wilson Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Tvrtko Ursulin Acked-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Acked-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210723113405.427004-1-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7f13d241417f..bde5860b3686 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -683,6 +683,9 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 +/* Query if the kernel supports the I915_USERPTR_PROBE flag. */ +#define I915_PARAM_HAS_USERPTR_PROBE 56 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -2231,12 +2234,29 @@ struct drm_i915_gem_userptr { * through the GTT. If the HW can't support readonly access, an error is * returned. * + * I915_USERPTR_PROBE: + * + * Probe the provided @user_ptr range and validate that the @user_ptr is + * indeed pointing to normal memory and that the range is also valid. + * For example if some garbage address is given to the kernel, then this + * should complain. + * + * Returns -EFAULT if the probe failed. + * + * Note that this doesn't populate the backing pages, and also doesn't + * guarantee that the object will remain valid when the object is + * eventually used. + * + * The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE + * returns a non-zero value. + * * I915_USERPTR_UNSYNCHRONIZED: * * NOT USED. Setting this flag will result in an error. */ __u32 flags; #define I915_USERPTR_READ_ONLY 0x1 +#define I915_USERPTR_PROBE 0x2 #define I915_USERPTR_UNSYNCHRONIZED 0x80000000 /** * @handle: Returned handle for the object. -- cgit v1.2.3 From db243b796439c0caba47865564d8acd18a301d18 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 4 Aug 2021 15:45:36 -0500 Subject: net/ipv4/ipv6: Replace one-element arraya with flexible-array members MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. Use an anonymous union with a couple of anonymous structs in order to keep userspace unchanged and refactor the related code accordingly: $ pahole -C group_filter net/ipv4/ip_sockglue.o struct group_filter { union { struct { __u32 gf_interface_aux; /* 0 4 */ /* XXX 4 bytes hole, try to pack */ struct __kernel_sockaddr_storage gf_group_aux; /* 8 128 */ /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */ __u32 gf_fmode_aux; /* 136 4 */ __u32 gf_numsrc_aux; /* 140 4 */ struct __kernel_sockaddr_storage gf_slist[1]; /* 144 128 */ }; /* 0 272 */ struct { __u32 gf_interface; /* 0 4 */ /* XXX 4 bytes hole, try to pack */ struct __kernel_sockaddr_storage gf_group; /* 8 128 */ /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */ __u32 gf_fmode; /* 136 4 */ __u32 gf_numsrc; /* 140 4 */ struct __kernel_sockaddr_storage gf_slist_flex[0]; /* 144 0 */ }; /* 0 144 */ }; /* 0 272 */ /* size: 272, cachelines: 5, members: 1 */ /* last cacheline: 16 bytes */ }; $ pahole -C compat_group_filter net/ipv4/ip_sockglue.o struct compat_group_filter { union { struct { __u32 gf_interface_aux; /* 0 4 */ struct __kernel_sockaddr_storage gf_group_aux __attribute__((__aligned__(4))); /* 4 128 */ /* --- cacheline 2 boundary (128 bytes) was 4 bytes ago --- */ __u32 gf_fmode_aux; /* 132 4 */ __u32 gf_numsrc_aux; /* 136 4 */ struct __kernel_sockaddr_storage gf_slist[1] __attribute__((__aligned__(4))); /* 140 128 */ } __attribute__((__packed__)) __attribute__((__aligned__(4))); /* 0 268 */ struct { __u32 gf_interface; /* 0 4 */ struct __kernel_sockaddr_storage gf_group __attribute__((__aligned__(4))); /* 4 128 */ /* --- cacheline 2 boundary (128 bytes) was 4 bytes ago --- */ __u32 gf_fmode; /* 132 4 */ __u32 gf_numsrc; /* 136 4 */ struct __kernel_sockaddr_storage gf_slist_flex[0] __attribute__((__aligned__(4))); /* 140 0 */ } __attribute__((__packed__)) __attribute__((__aligned__(4))); /* 0 140 */ } __attribute__((__aligned__(1))); /* 0 268 */ /* size: 268, cachelines: 5, members: 1 */ /* forced alignments: 1 */ /* last cacheline: 12 bytes */ } __attribute__((__packed__)); This helps with the ongoing efforts to globally enable -Warray-bounds and get us closer to being able to tighten the FORTIFY_SOURCE routines on memcpy(). [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.10/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/79 Link: https://github.com/KSPP/linux/issues/109 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 193b7cf1f0ac..14168225cecd 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -222,11 +222,22 @@ struct group_source_req { }; struct group_filter { - __u32 gf_interface; /* interface index */ - struct __kernel_sockaddr_storage gf_group; /* multicast address */ - __u32 gf_fmode; /* filter mode */ - __u32 gf_numsrc; /* number of sources */ - struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + union { + struct { + __u32 gf_interface_aux; /* interface index */ + struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */ + __u32 gf_fmode_aux; /* filter mode */ + __u32 gf_numsrc_aux; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + }; + struct { + __u32 gf_interface; /* interface index */ + struct __kernel_sockaddr_storage gf_group; /* multicast address */ + __u32 gf_fmode; /* filter mode */ + __u32 gf_numsrc; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */ + }; + }; }; #define GROUP_FILTER_SIZE(numsrc) \ -- cgit v1.2.3 From 9344988d2979ce9eefe136a69efcf692615ebba8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 30 Jul 2021 15:14:22 +0200 Subject: netfilter: ctnetlink: allow to filter dump by status bits If CTA_STATUS is present, but CTA_STATUS_MASK is not, then the mask is automatically set to 'status', so that kernel returns those entries that have all of the requested bits set. This makes more sense than using a all-one mask since we'd hardly ever find a match. There are no other checks for status bits, so if e.g. userspace sets impossible combinations it will get an empty dump. If kernel would reject unknown status bits, then a program that works on a future kernel that has IPS_FOO bit fails on old kernels. Same for 'impossible' combinations: Kernel never sets ASSURED without first having set SEEN_REPLY, but its possible that a future kernel could do so. Therefore no sanity tests other than a 0-mask. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index d8484be72fdc..c6e6d7d7d538 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -56,6 +56,7 @@ enum ctattr_type { CTA_LABELS_MASK, CTA_SYNPROXY, CTA_FILTER, + CTA_STATUS_MASK, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) -- cgit v1.2.3 From a43e2a0e11491b73e2acaa27ee74d6c3b86deac0 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 16 Jul 2021 22:46:21 -0400 Subject: drm/amdkfd: Allow querying SVM attributes that are clear Currently the SVM get_attr call allows querying, which flags are set in the entire address range. Add the opposite query, which flags are clear in the entire address range. Both queries can be combined in a single get_attr call, which allows answering questions such as, "is this address range coherent, non-coherent, or a mix of both"? Proposed userspace for UAPI: https://github.com/RadeonOpenCompute/ROCR-Runtime/tree/memory_model_queries Signed-off-by: Felix Kuehling Reviewed-by: Philip Yand Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 3cb5b5dd9f77..af96af174dc4 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -31,9 +31,10 @@ * - 1.3 - Add SMI events support * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API + * - 1.6 - Query clear flags in SVM get_attr API */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 5 +#define KFD_IOCTL_MINOR_VERSION 6 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -575,18 +576,19 @@ struct kfd_ioctl_svm_attribute { * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC or * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC resepctively. For * @KFD_IOCTL_SVM_ATTR_SET_FLAGS, flags of all pages will be - * aggregated by bitwise AND. The minimum migration granularity - * throughout the range will be returned for - * @KFD_IOCTL_SVM_ATTR_GRANULARITY. + * aggregated by bitwise AND. That means, a flag will be set in the + * output, if that flag is set for all pages in the range. For + * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS, flags of all pages will be + * aggregated by bitwise NOR. That means, a flag will be set in the + * output, if that flag is clear for all pages in the range. + * The minimum migration granularity throughout the range will be + * returned for @KFD_IOCTL_SVM_ATTR_GRANULARITY. * * Querying of accessibility attributes works by initializing the * attribute type to @KFD_IOCTL_SVM_ATTR_ACCESS and the value to the * GPUID being queried. Multiple attributes can be given to allow * querying multiple GPUIDs. The ioctl function overwrites the * attribute type to indicate the access for the specified GPU. - * - * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS is invalid for - * @KFD_IOCTL_SVM_OP_GET_ATTR. */ struct kfd_ioctl_svm_args { __u64 start_addr; -- cgit v1.2.3 From 57e203953d150e6304ab6936bd2d9aa2daa687f4 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 10 Jun 2021 14:04:48 +0300 Subject: drm/tegra: Add new UAPI to header Update the tegra_drm.h UAPI header, adding the new proposed UAPI. The old staging UAPI is left in for now, with minor modification to avoid name collisions. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/uapi/drm/tegra_drm.h | 425 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 402 insertions(+), 23 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h index c4df3c3668b3..94cfc306d50a 100644 --- a/include/uapi/drm/tegra_drm.h +++ b/include/uapi/drm/tegra_drm.h @@ -1,24 +1,5 @@ -/* - * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) 2012-2020 NVIDIA Corporation */ #ifndef _UAPI_TEGRA_DRM_H_ #define _UAPI_TEGRA_DRM_H_ @@ -29,6 +10,8 @@ extern "C" { #endif +/* Tegra DRM legacy UAPI. Only enabled with STAGING */ + #define DRM_TEGRA_GEM_CREATE_TILED (1 << 0) #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1) @@ -649,8 +632,8 @@ struct drm_tegra_gem_get_flags { #define DRM_TEGRA_SYNCPT_READ 0x02 #define DRM_TEGRA_SYNCPT_INCR 0x03 #define DRM_TEGRA_SYNCPT_WAIT 0x04 -#define DRM_TEGRA_OPEN_CHANNEL 0x05 -#define DRM_TEGRA_CLOSE_CHANNEL 0x06 +#define DRM_TEGRA_OPEN_CHANNEL 0x05 +#define DRM_TEGRA_CLOSE_CHANNEL 0x06 #define DRM_TEGRA_GET_SYNCPT 0x07 #define DRM_TEGRA_SUBMIT 0x08 #define DRM_TEGRA_GET_SYNCPT_BASE 0x09 @@ -674,6 +657,402 @@ struct drm_tegra_gem_get_flags { #define DRM_IOCTL_TEGRA_GEM_SET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_SET_FLAGS, struct drm_tegra_gem_set_flags) #define DRM_IOCTL_TEGRA_GEM_GET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_GET_FLAGS, struct drm_tegra_gem_get_flags) +/* New Tegra DRM UAPI */ + +/* + * Reported by the driver in the `capabilities` field. + * + * DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT: If set, the engine is cache coherent + * with regard to the system memory. + */ +#define DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT (1 << 0) + +struct drm_tegra_channel_open { + /** + * @host1x_class: [in] + * + * Host1x class of the engine that will be programmed using this + * channel. + */ + __u32 host1x_class; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @context: [out] + * + * Opaque identifier corresponding to the opened channel. + */ + __u32 context; + + /** + * @version: [out] + * + * Version of the engine hardware. This can be used by userspace + * to determine how the engine needs to be programmed. + */ + __u32 version; + + /** + * @capabilities: [out] + * + * Flags describing the hardware capabilities. + */ + __u32 capabilities; + __u32 padding; +}; + +struct drm_tegra_channel_close { + /** + * @context: [in] + * + * Identifier of the channel to close. + */ + __u32 context; + __u32 padding; +}; + +/* + * Mapping flags that can be used to influence how the mapping is created. + * + * DRM_TEGRA_CHANNEL_MAP_READ: create mapping that allows HW read access + * DRM_TEGRA_CHANNEL_MAP_WRITE: create mapping that allows HW write access + */ +#define DRM_TEGRA_CHANNEL_MAP_READ (1 << 0) +#define DRM_TEGRA_CHANNEL_MAP_WRITE (1 << 1) +#define DRM_TEGRA_CHANNEL_MAP_READ_WRITE (DRM_TEGRA_CHANNEL_MAP_READ | \ + DRM_TEGRA_CHANNEL_MAP_WRITE) + +struct drm_tegra_channel_map { + /** + * @context: [in] + * + * Identifier of the channel to which make memory available for. + */ + __u32 context; + + /** + * @handle: [in] + * + * GEM handle of the memory to map. + */ + __u32 handle; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @mapping: [out] + * + * Identifier corresponding to the mapping, to be used for + * relocations or unmapping later. + */ + __u32 mapping; +}; + +struct drm_tegra_channel_unmap { + /** + * @context: [in] + * + * Channel identifier of the channel to unmap memory from. + */ + __u32 context; + + /** + * @mapping: [in] + * + * Mapping identifier of the memory mapping to unmap. + */ + __u32 mapping; +}; + +/* Submission */ + +/** + * Specify that bit 39 of the patched-in address should be set to switch + * swizzling between Tegra and non-Tegra sector layout on systems that store + * surfaces in system memory in non-Tegra sector layout. + */ +#define DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT (1 << 0) + +struct drm_tegra_submit_buf { + /** + * @mapping: [in] + * + * Identifier of the mapping to use in the submission. + */ + __u32 mapping; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * Information for relocation patching. + */ + struct { + /** + * @target_offset: [in] + * + * Offset from the start of the mapping of the data whose + * address is to be patched into the gather. + */ + __u64 target_offset; + + /** + * @gather_offset_words: [in] + * + * Offset in words from the start of the gather data to + * where the address should be patched into. + */ + __u32 gather_offset_words; + + /** + * @shift: [in] + * + * Number of bits the address should be shifted right before + * patching in. + */ + __u32 shift; + } reloc; +}; + +/** + * Execute `words` words of Host1x opcodes specified in the `gather_data_ptr` + * buffer. Each GATHER_UPTR command uses successive words from the buffer. + */ +#define DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR 0 +/** + * Wait for a syncpoint to reach a value before continuing with further + * commands. + */ +#define DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT 1 +/** + * Wait for a syncpoint to reach a value before continuing with further + * commands. The threshold is calculated relative to the start of the job. + */ +#define DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT_RELATIVE 2 + +struct drm_tegra_submit_cmd_gather_uptr { + __u32 words; + __u32 reserved[3]; +}; + +struct drm_tegra_submit_cmd_wait_syncpt { + __u32 id; + __u32 value; + __u32 reserved[2]; +}; + +struct drm_tegra_submit_cmd { + /** + * @type: [in] + * + * Command type to execute. One of the DRM_TEGRA_SUBMIT_CMD* + * defines. + */ + __u32 type; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + union { + struct drm_tegra_submit_cmd_gather_uptr gather_uptr; + struct drm_tegra_submit_cmd_wait_syncpt wait_syncpt; + __u32 reserved[4]; + }; +}; + +struct drm_tegra_submit_syncpt { + /** + * @id: [in] + * + * ID of the syncpoint that the job will increment. + */ + __u32 id; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @increments: [in] + * + * Number of times the job will increment this syncpoint. + */ + __u32 increments; + + /** + * @value: [out] + * + * Value the syncpoint will have once the job has completed all + * its specified syncpoint increments. + * + * Note that the kernel may increment the syncpoint before or after + * the job. These increments are not reflected in this field. + * + * If the job hangs or times out, not all of the increments may + * get executed. + */ + __u32 value; +}; + +struct drm_tegra_channel_submit { + /** + * @context: [in] + * + * Identifier of the channel to submit this job to. + */ + __u32 context; + + /** + * @num_bufs: [in] + * + * Number of elements in the `bufs_ptr` array. + */ + __u32 num_bufs; + + /** + * @num_cmds: [in] + * + * Number of elements in the `cmds_ptr` array. + */ + __u32 num_cmds; + + /** + * @gather_data_words: [in] + * + * Number of 32-bit words in the `gather_data_ptr` array. + */ + __u32 gather_data_words; + + /** + * @bufs_ptr: [in] + * + * Pointer to an array of drm_tegra_submit_buf structures. + */ + __u64 bufs_ptr; + + /** + * @cmds_ptr: [in] + * + * Pointer to an array of drm_tegra_submit_cmd structures. + */ + __u64 cmds_ptr; + + /** + * @gather_data_ptr: [in] + * + * Pointer to an array of Host1x opcodes to be used by GATHER_UPTR + * commands. + */ + __u64 gather_data_ptr; + + /** + * @syncobj_in: [in] + * + * Handle for DRM syncobj that will be waited before submission. + * Ignored if zero. + */ + __u32 syncobj_in; + + /** + * @syncobj_out: [in] + * + * Handle for DRM syncobj that will have its fence replaced with + * the job's completion fence. Ignored if zero. + */ + __u32 syncobj_out; + + /** + * @syncpt_incr: [in,out] + * + * Information about the syncpoint the job will increment. + */ + struct drm_tegra_submit_syncpt syncpt; +}; + +struct drm_tegra_syncpoint_allocate { + /** + * @id: [out] + * + * ID of allocated syncpoint. + */ + __u32 id; + __u32 padding; +}; + +struct drm_tegra_syncpoint_free { + /** + * @id: [in] + * + * ID of syncpoint to free. + */ + __u32 id; + __u32 padding; +}; + +struct drm_tegra_syncpoint_wait { + /** + * @timeout: [in] + * + * Absolute timestamp at which the wait will time out. + */ + __s64 timeout_ns; + + /** + * @id: [in] + * + * ID of syncpoint to wait on. + */ + __u32 id; + + /** + * @threshold: [in] + * + * Threshold to wait for. + */ + __u32 threshold; + + /** + * @value: [out] + * + * Value of the syncpoint upon wait completion. + */ + __u32 value; + + __u32 padding; +}; + +#define DRM_IOCTL_TEGRA_CHANNEL_OPEN DRM_IOWR(DRM_COMMAND_BASE + 0x10, struct drm_tegra_channel_open) +#define DRM_IOCTL_TEGRA_CHANNEL_CLOSE DRM_IOWR(DRM_COMMAND_BASE + 0x11, struct drm_tegra_channel_close) +#define DRM_IOCTL_TEGRA_CHANNEL_MAP DRM_IOWR(DRM_COMMAND_BASE + 0x12, struct drm_tegra_channel_map) +#define DRM_IOCTL_TEGRA_CHANNEL_UNMAP DRM_IOWR(DRM_COMMAND_BASE + 0x13, struct drm_tegra_channel_unmap) +#define DRM_IOCTL_TEGRA_CHANNEL_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + 0x14, struct drm_tegra_channel_submit) + +#define DRM_IOCTL_TEGRA_SYNCPOINT_ALLOCATE DRM_IOWR(DRM_COMMAND_BASE + 0x20, struct drm_tegra_syncpoint_allocate) +#define DRM_IOCTL_TEGRA_SYNCPOINT_FREE DRM_IOWR(DRM_COMMAND_BASE + 0x21, struct drm_tegra_syncpoint_free) +#define DRM_IOCTL_TEGRA_SYNCPOINT_WAIT DRM_IOWR(DRM_COMMAND_BASE + 0x22, struct drm_tegra_syncpoint_wait) + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 91ccbbac1747eea155632a1c6bb100052309b215 Mon Sep 17 00:00:00 2001 From: Tushar Sugandhi Date: Mon, 12 Jul 2021 17:48:58 -0700 Subject: dm ima: measure data on table load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DM configures a block device with various target specific attributes passed to it as a table. DM loads the table, and calls each target’s respective constructors with the attributes as input parameters. Some of these attributes are critical to ensure the device meets certain security bar. Thus, IMA should measure these attributes, to ensure they are not tampered with, during the lifetime of the device. So that the external services can have high confidence in the configuration of the block-devices on a given system. Some devices may have large tables. And a given device may change its state (table-load, suspend, resume, rename, remove, table-clear etc.) many times. Measuring these attributes each time when the device changes its state will significantly increase the size of the IMA logs. Further, once configured, these attributes are not expected to change unless a new table is loaded, or a device is removed and recreated. Therefore the clear-text of the attributes should only be measured during table load, and the hash of the active/inactive table should be measured for the remaining device state changes. Export IMA function ima_measure_critical_data() to allow measurement of DM device parameters, as well as target specific attributes, during table load. Compute the hash of the inactive table and store it for measurements during future state change. If a load is called multiple times, update the inactive table hash with the hash of the latest populated table. So that the correct inactive table hash is measured when the device transitions to different states like resume, remove, rename, etc. Signed-off-by: Tushar Sugandhi Signed-off-by: Colin Ian King # leak fix Signed-off-by: Mike Snitzer --- include/uapi/linux/dm-ioctl.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index e5c6e458bdf7..c12ce30b52df 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -376,4 +376,10 @@ enum { */ #define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */ +/* + * If set, returns in the in buffer passed by UM, the raw table information + * that would be measured by IMA subsystem on device state change. + */ +#define DM_IMA_MEASUREMENT_FLAG (1 << 19) /* In */ + #endif /* _LINUX_DM_IOCTL_H */ -- cgit v1.2.3 From df271cd641f101decaa4f7c1dd5c62939900bd4c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:19 +0300 Subject: net: bridge: vlan: add support for mcast igmp/mld version global options Add support to change and retrieve global vlan IGMP/MLD versions. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 5aca85874447..5188b9f6da28 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -549,6 +549,8 @@ enum { BRIDGE_VLANDB_GOPTS_ID, BRIDGE_VLANDB_GOPTS_RANGE, BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, + BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION, + BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 931ba87d2017f3869d656f3c705883549bfeb97f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:20 +0300 Subject: net: bridge: vlan: add support for mcast last member count global option Add support to change and retrieve global vlan multicast last member count option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 5188b9f6da28..d7a150034376 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -551,6 +551,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION, BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION, + BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 50725f6e6b217e7661ca696b7cc1f1b9aa7bda84 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:21 +0300 Subject: net: bridge: vlan: add support for mcast startup query count global option Add support to change and retrieve global vlan multicast startup query count option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index d7a150034376..082b413e1342 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -552,6 +552,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION, BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION, BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT, + BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 77f6ababa299112092a264cac96bedf1a87015ef Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:22 +0300 Subject: net: bridge: vlan: add support for mcast last member interval global option Add support to change and retrieve global vlan multicast last member interval option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 082b413e1342..950ad175610e 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -553,6 +553,8 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION, BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT, + BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL, + BRIDGE_VLANDB_GOPTS_PAD, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 2da0aea21f1c40d003af6680551eaa5471103164 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:23 +0300 Subject: net: bridge: vlan: add support for mcast membership interval global option Add support to change and retrieve global vlan multicast membership interval option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 950ad175610e..93f1f16617c8 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -555,6 +555,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT, BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL, BRIDGE_VLANDB_GOPTS_PAD, + BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From cd9269d463107bc4a53a0965d90a57efeee9ae11 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:24 +0300 Subject: net: bridge: vlan: add support for mcast querier interval global option Add support to change and retrieve global vlan multicast querier interval option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 93f1f16617c8..fdc264c57009 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -556,6 +556,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL, BRIDGE_VLANDB_GOPTS_PAD, BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From d6c08aba4f29f606769939eb6156efceb7dbb790 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:25 +0300 Subject: net: bridge: vlan: add support for mcast query interval global option Add support to change and retrieve global vlan multicast query interval option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index fdc264c57009..1517aea738f4 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -557,6 +557,7 @@ enum { BRIDGE_VLANDB_GOPTS_PAD, BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 425214508b1bd3596edb31da8d9aedee30f2b4f5 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:26 +0300 Subject: net: bridge: vlan: add support for mcast query response interval global option Add support to change and retrieve global vlan multicast query response interval option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 1517aea738f4..2627a657c3b3 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -558,6 +558,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 941121ee22a69935252473f03976f1f1200b9ae9 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:27 +0300 Subject: net: bridge: vlan: add support for mcast startup query interval global option Add support to change and retrieve global vlan multicast startup query interval option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 2627a657c3b3..b5d01538acd4 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -559,6 +559,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 62938182c35906c0ed4beb7845b93b8ffb937597 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:30 +0300 Subject: net: bridge: vlan: add support for mcast querier global option Add support to change and retrieve global vlan multicast querier state. We just need to pass multicast context to br_multicast_set_querier instead of bridge device and the rest of the logic remains the same. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index b5d01538acd4..03fd14a4e377 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -560,6 +560,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From a97df080b6a86c105f98052ca3a9d66149b461b3 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:31 +0300 Subject: net: bridge: vlan: add support for mcast router global option Add support to change and retrieve global vlan multicast router state which is used for the bridge itself. We just need to pass multicast context to br_multicast_set_router instead of bridge device and the rest of the logic remains the same. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 03fd14a4e377..2104dd3557b4 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -561,6 +561,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, + BRIDGE_VLANDB_GOPTS_MCAST_ROUTER, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From dc002875c22b56c795ec24dc987ac2dd2081588e Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:33 +0300 Subject: net: bridge: vlan: use br_rports_fill_info() to export mcast router ports Embed the standard multicast router port export by br_rports_fill_info() into a new global vlan attribute BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS. In order to have the same format for the global bridge mcast context and the per-vlan mcast context we need a double-nesting: - BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS - MDBA_ROUTER Currently we don't compare router lists, if any router port exists in the bridge mcast contexts we consider their option sets as different and export them separately. In addition we export the router port vlan id when dumping similar to the router port notification format. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 2104dd3557b4..620d86e825b8 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -562,6 +562,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER, + BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 3d2a2544eae93987f0688c2d6ec06c76f9e1477b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 25 Jun 2021 11:17:16 +0300 Subject: nl80211: vendor-cmd: add Intel vendor commands for iwlmei usage iwlmei allows to integrate with the CSME firmware. There are flows that are prioprietary for this purpose: * Get the information of the AP the CSME firmware is connected to. This is useful when we need to speed up the connection process in case the CSME firmware has a TCP connection that must be kept alive across the ownership transition. * Forbid roaming, which will happen when the CSME firmware wants to tell the user space not disrupt the connection. * Request ownership, upon driver boot when the CSME firmware owns the device. This is a notification sent by the kernel. All those commands are expected to be used by any software managing the connection (mainly NetworkManager). Those commands are expected to be used only in case the CSME firmware owns the device and doesn't want to release the device unless the host made sure that it can keep the connectivity. Here are the steps of the expected flow: 1) The machine boots while AMT has an active TCP connection 2) iwlwifi starts and tries to access the device 3) The device is not available because of the active TCP connection. (If there are no active connections, the CSME firmware would have allowed iwlwifi to use the device) Note that all the steps up to here don't involve iwlmei. All this happens in iwlwifi (in iwl_pcie_prepare_card_hw). 4) iwlmei establishes a connection to the CSME firmware (through SAP) Here iwlwifi uses iwlmei to access the device's capabilities (since it can't touch the device), but this is not relevant for the vendor commands. 5) The CSME firmware tells iwlmei that it uses the NIC and that there is an acitve TCP connection, and hence, the host needs to think twice before asking the CSME firmware to release the device 6) iwlmei tells iwlwifi to report HW RFKILL with a special reason Up to here, there was no user space involved. 7) The user space (NetworkManager) boots and sees that the device is in RFKILL because the host doesn't own the device 8) The user space asks the kernel what AP the CSME firmware is connected to (with the first vendor command mentionned above) 9) The user space checks if it has a profile that matches the reply from the CSME firmware 10) The user space installs a network to the wpa_supplicant with a specific BSSID and a specific frequency 11) The user space prevents any type of full scan 12) The user space asks iwlmei to request ownership on the device (with the third vendor command) 13) iwlmei request ownership from the CSME firmware 14) The CSME firmware grants ownership 15) iwlmei tells iwlwifi to lift the RFKILL 16) RFKILL OFF is reported to userspace 17) The host boots the device, loads the firwmare, and connect to a specific BSSID without scanning including IP in less than 600ms (this is what I measured, of course it depends on many factors) 18) The host reports to the CSME firmware that there is a connection 19) The TCP connection is preserved and the host has now connectivity 20) Later, the TCP connection to the CSME firmware is terminated 21) The CSME firmware tells iwlmei that it is now free to do whatever it likes 22) iwlwifi sends the second vendor command to tell the user space that it can remove the special network configuration and pick any SSID / BSSID it likes. Co-Developed-by: Ayala Beker Signed-off-by: Emmanuel Grumbach Link: https://lore.kernel.org/r/20210625081717.7680-4-emmanuel.grumbach@intel.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211-vnd-intel.h | 77 ++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 include/uapi/linux/nl80211-vnd-intel.h (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211-vnd-intel.h b/include/uapi/linux/nl80211-vnd-intel.h new file mode 100644 index 000000000000..0bf177b84fd9 --- /dev/null +++ b/include/uapi/linux/nl80211-vnd-intel.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2012-2014, 2018-2021 Intel Corporation + * Copyright (C) 2013-2015 Intel Mobile Communications GmbH + * Copyright (C) 2016-2017 Intel Deutschland GmbH + */ +#ifndef __VENDOR_CMD_INTEL_H__ +#define __VENDOR_CMD_INTEL_H__ + +#define INTEL_OUI 0x001735 + +/** + * enum iwl_mvm_vendor_cmd - supported vendor commands + * @IWL_MVM_VENDOR_CMD_GET_CSME_CONN_INFO: reports CSME connection info. + * @IWL_MVM_VENDOR_CMD_HOST_GET_OWNERSHIP: asks for ownership on the device. + * @IWL_MVM_VENDOR_CMD_ROAMING_FORBIDDEN_EVENT: notifies if roaming is allowed. + * It contains a &IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN and a + * &IWL_MVM_VENDOR_ATTR_VIF_ADDR attributes. + */ + +enum iwl_mvm_vendor_cmd { + IWL_MVM_VENDOR_CMD_GET_CSME_CONN_INFO = 0x2d, + IWL_MVM_VENDOR_CMD_HOST_GET_OWNERSHIP = 0x30, + IWL_MVM_VENDOR_CMD_ROAMING_FORBIDDEN_EVENT = 0x32, +}; + +enum iwl_vendor_auth_akm_mode { + IWL_VENDOR_AUTH_OPEN, + IWL_VENDOR_AUTH_RSNA = 0x6, + IWL_VENDOR_AUTH_RSNA_PSK, + IWL_VENDOR_AUTH_SAE = 0x9, + IWL_VENDOR_AUTH_MAX, +}; + +/** + * enum iwl_mvm_vendor_attr - attributes used in vendor commands + * @__IWL_MVM_VENDOR_ATTR_INVALID: attribute 0 is invalid + * @IWL_MVM_VENDOR_ATTR_VIF_ADDR: interface MAC address + * @IWL_MVM_VENDOR_ATTR_ADDR: MAC address + * @IWL_MVM_VENDOR_ATTR_SSID: SSID (binary attribute, 0..32 octets) + * @IWL_MVM_VENDOR_ATTR_STA_CIPHER: the cipher to use for the station with the + * mac address specified in &IWL_MVM_VENDOR_ATTR_ADDR. + * @IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN: u8 attribute. Indicates whether + * roaming is forbidden or not. Value 1 means roaming is forbidden, + * 0 mean roaming is allowed. + * @IWL_MVM_VENDOR_ATTR_AUTH_MODE: u32 attribute. Authentication mode type + * as specified in &enum iwl_vendor_auth_akm_mode. + * @IWL_MVM_VENDOR_ATTR_CHANNEL_NUM: u8 attribute. Contains channel number. + * @IWL_MVM_VENDOR_ATTR_BAND: u8 attribute. + * 0 for 2.4 GHz band, 1 for 5.2GHz band and 2 for 6GHz band. + * @IWL_MVM_VENDOR_ATTR_COLLOC_CHANNEL: u32 attribute. Channel number of + * collocated AP. Relevant for 6GHz AP info. + * @IWL_MVM_VENDOR_ATTR_COLLOC_ADDR: MAC address of a collocated AP. + * Relevant for 6GHz AP info. + * + * @NUM_IWL_MVM_VENDOR_ATTR: number of vendor attributes + * @MAX_IWL_MVM_VENDOR_ATTR: highest vendor attribute number + + */ +enum iwl_mvm_vendor_attr { + __IWL_MVM_VENDOR_ATTR_INVALID = 0x00, + IWL_MVM_VENDOR_ATTR_VIF_ADDR = 0x02, + IWL_MVM_VENDOR_ATTR_ADDR = 0x0a, + IWL_MVM_VENDOR_ATTR_SSID = 0x3d, + IWL_MVM_VENDOR_ATTR_STA_CIPHER = 0x51, + IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN = 0x64, + IWL_MVM_VENDOR_ATTR_AUTH_MODE = 0x65, + IWL_MVM_VENDOR_ATTR_CHANNEL_NUM = 0x66, + IWL_MVM_VENDOR_ATTR_BAND = 0x69, + IWL_MVM_VENDOR_ATTR_COLLOC_CHANNEL = 0x70, + IWL_MVM_VENDOR_ATTR_COLLOC_ADDR = 0x71, + + NUM_IWL_MVM_VENDOR_ATTR, + MAX_IWL_MVM_VENDOR_ATTR = NUM_IWL_MVM_VENDOR_ATTR - 1, +}; + +#endif /* __VENDOR_CMD_INTEL_H__ */ -- cgit v1.2.3 From 81be10934949da8b12ca4db3de1511a4220fa9b4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 13 Aug 2021 10:21:42 +0200 Subject: ALSA: pcm: Add SNDRV_PCM_INFO_EXPLICIT_SYNC flag ALSA PCM core has an optimized way to communicate with user-space for its control and status data via mmap on the supported architectures like x86. Depending on the situation, however, we'd rather want to enforce user-space notifying the applptr or hwptr change explicitly via ioctl. For example, the upcoming non-contig and non-coherent buffer handling would need an explicit sync, and this needs to catch the applptr and hwptr changes. Also, ASoC SOF driver will have the SPIB support that has the similar requirement for the explicit control of the applptr and hwptr. This patch adds the new PCM hardware info flag, SNDRV_PCM_INFO_EXPLICIT_SYNC. When this flag is set, PCM core disables both the control and the status mmap, which enforces user-space to update via SYNC_PTR ioctl. In that way, drivers can catch the applptr and hwptr update and apply the sync operation if needed. Link: https://lore.kernel.org/r/20210812113818.6479-1-tiwai@suse.de Link: https://lore.kernel.org/r/20210610205326.1176400-1-pierre-louis.bossart@linux.intel.com Link: https://lore.kernel.org/r/20210813082142.5375-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/uapi/sound/asound.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index d17c061950df..1d84ec9db93b 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -299,6 +299,7 @@ typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_INFO_HAS_LINK_ABSOLUTE_ATIME 0x02000000 /* report absolute hardware link audio time, not reset on startup */ #define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */ #define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */ +#define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */ #define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */ #define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ -- cgit v1.2.3 From c7fa1d9b1fb179375e889ff076a1566ecc997bfc Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 13 Aug 2021 18:00:00 +0300 Subject: net: bridge: mcast: dump ipv4 querier state Add support for dumping global IPv4 querier state, we dump the state only if our own querier is enabled or there has been another external querier which has won the election. For the bridge global state we use a new attribute IFLA_BR_MCAST_QUERIER_STATE and embed the state inside. The structure is: [IFLA_BR_MCAST_QUERIER_STATE] `[BRIDGE_QUERIER_IP_ADDRESS] - ip address of the querier `[BRIDGE_QUERIER_IP_PORT] - bridge port ifindex where the querier was seen (set only if external querier) `[BRIDGE_QUERIER_IP_OTHER_TIMER] - other querier timeout Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 10 ++++++++++ include/uapi/linux/if_link.h | 1 + 2 files changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 620d86e825b8..e0fff67fcd88 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -770,4 +770,14 @@ struct br_boolopt_multi { __u32 optval; __u32 optmask; }; + +enum { + BRIDGE_QUERIER_UNSPEC, + BRIDGE_QUERIER_IP_ADDRESS, + BRIDGE_QUERIER_IP_PORT, + BRIDGE_QUERIER_IP_OTHER_TIMER, + BRIDGE_QUERIER_PAD, + __BRIDGE_QUERIER_MAX +}; +#define BRIDGE_QUERIER_MAX (__BRIDGE_QUERIER_MAX - 1) #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5310003523ce..8aad65b69054 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -479,6 +479,7 @@ enum { IFLA_BR_MCAST_MLD_VERSION, IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, + IFLA_BR_MCAST_QUERIER_STATE, __IFLA_BR_MAX, }; -- cgit v1.2.3 From 85b4108211742c5dd4f9f56c1d0704b4e0d4c98e Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 13 Aug 2021 18:00:01 +0300 Subject: net: bridge: mcast: dump ipv6 querier state Add support for dumping global IPv6 querier state, we dump the state only if our own querier is enabled or there has been another external querier which has won the election. For the bridge global state we use a new attribute IFLA_BR_MCAST_QUERIER_STATE and embed the state inside. The structure is: [IFLA_BR_MCAST_QUERIER_STATE] `[BRIDGE_QUERIER_IPV6_ADDRESS] - ip address of the querier `[BRIDGE_QUERIER_IPV6_PORT] - bridge port ifindex where the querier was seen (set only if external querier) `[BRIDGE_QUERIER_IPV6_OTHER_TIMER] - other querier timeout IPv4 and IPv6 attributes are embedded at the same level of IFLA_BR_MCAST_QUERIER_STATE. If we didn't dump anything we cancel the nest and return. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index e0fff67fcd88..eceaad200bf6 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -777,6 +777,9 @@ enum { BRIDGE_QUERIER_IP_PORT, BRIDGE_QUERIER_IP_OTHER_TIMER, BRIDGE_QUERIER_PAD, + BRIDGE_QUERIER_IPV6_ADDRESS, + BRIDGE_QUERIER_IPV6_PORT, + BRIDGE_QUERIER_IPV6_OTHER_TIMER, __BRIDGE_QUERIER_MAX }; #define BRIDGE_QUERIER_MAX (__BRIDGE_QUERIER_MAX - 1) -- cgit v1.2.3 From ddc649d158c560c6685be1701900a6e456ecceac Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 13 Aug 2021 18:00:02 +0300 Subject: net: bridge: vlan: dump mcast ctx querier state Use the new mcast querier state dump infrastructure and export vlans' mcast context querier state embedded in attribute BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index eceaad200bf6..f71a81fdbbc6 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -563,6 +563,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 6c9b40844751ea30c72f7a2f92f4d704bc6b2927 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Fri, 13 Aug 2021 20:08:02 +0800 Subject: net: Remove net/ipx.h and uapi/linux/ipx.h header files commit <47595e32869f> ("") indicated the ipx network layer as obsolete in Jan 2018, updated in the MAINTAINERS file now, after being exposed for 3 years to refactoring, so to delete uapi/linux/ipx.h and net/ipx.h header files for good. additionally, there is no module that depends on ipx.h except a broken staging driver(r8188eu) Signed-off-by: Cai Huoqing Signed-off-by: David S. Miller --- include/uapi/linux/ipx.h | 87 ------------------------------------------------ 1 file changed, 87 deletions(-) delete mode 100644 include/uapi/linux/ipx.h (limited to 'include/uapi') diff --git a/include/uapi/linux/ipx.h b/include/uapi/linux/ipx.h deleted file mode 100644 index 3168137adae8..000000000000 --- a/include/uapi/linux/ipx.h +++ /dev/null @@ -1,87 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _IPX_H_ -#define _IPX_H_ -#include /* for compatibility with glibc netipx/ipx.h */ -#include -#include -#include -#define IPX_NODE_LEN 6 -#define IPX_MTU 576 - -#if __UAPI_DEF_SOCKADDR_IPX -struct sockaddr_ipx { - __kernel_sa_family_t sipx_family; - __be16 sipx_port; - __be32 sipx_network; - unsigned char sipx_node[IPX_NODE_LEN]; - __u8 sipx_type; - unsigned char sipx_zero; /* 16 byte fill */ -}; -#endif /* __UAPI_DEF_SOCKADDR_IPX */ - -/* - * So we can fit the extra info for SIOCSIFADDR into the address nicely - */ -#define sipx_special sipx_port -#define sipx_action sipx_zero -#define IPX_DLTITF 0 -#define IPX_CRTITF 1 - -#if __UAPI_DEF_IPX_ROUTE_DEFINITION -struct ipx_route_definition { - __be32 ipx_network; - __be32 ipx_router_network; - unsigned char ipx_router_node[IPX_NODE_LEN]; -}; -#endif /* __UAPI_DEF_IPX_ROUTE_DEFINITION */ - -#if __UAPI_DEF_IPX_INTERFACE_DEFINITION -struct ipx_interface_definition { - __be32 ipx_network; - unsigned char ipx_device[16]; - unsigned char ipx_dlink_type; -#define IPX_FRAME_NONE 0 -#define IPX_FRAME_SNAP 1 -#define IPX_FRAME_8022 2 -#define IPX_FRAME_ETHERII 3 -#define IPX_FRAME_8023 4 -#define IPX_FRAME_TR_8022 5 /* obsolete */ - unsigned char ipx_special; -#define IPX_SPECIAL_NONE 0 -#define IPX_PRIMARY 1 -#define IPX_INTERNAL 2 - unsigned char ipx_node[IPX_NODE_LEN]; -}; -#endif /* __UAPI_DEF_IPX_INTERFACE_DEFINITION */ - -#if __UAPI_DEF_IPX_CONFIG_DATA -struct ipx_config_data { - unsigned char ipxcfg_auto_select_primary; - unsigned char ipxcfg_auto_create_interfaces; -}; -#endif /* __UAPI_DEF_IPX_CONFIG_DATA */ - -/* - * OLD Route Definition for backward compatibility. - */ - -#if __UAPI_DEF_IPX_ROUTE_DEF -struct ipx_route_def { - __be32 ipx_network; - __be32 ipx_router_network; -#define IPX_ROUTE_NO_ROUTER 0 - unsigned char ipx_router_node[IPX_NODE_LEN]; - unsigned char ipx_device[16]; - unsigned short ipx_flags; -#define IPX_RT_SNAP 8 -#define IPX_RT_8022 4 -#define IPX_RT_BLUEBOOK 2 -#define IPX_RT_ROUTED 1 -}; -#endif /* __UAPI_DEF_IPX_ROUTE_DEF */ - -#define SIOCAIPXITFCRT (SIOCPROTOPRIVATE) -#define SIOCAIPXPRISLT (SIOCPROTOPRIVATE + 1) -#define SIOCIPXCFGDATA (SIOCPROTOPRIVATE + 2) -#define SIOCIPXNCPCONN (SIOCPROTOPRIVATE + 3) -#endif /* _IPX_H_ */ -- cgit v1.2.3 From 9ea9b9c48387edc101d56349492ad9c0492ff78d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 Aug 2021 15:23:08 +0200 Subject: remove the lightnvm subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lightnvm supports the OCSSD 1.x and 2.0 specs which were early attempts to produce Open Channel SSDs and never made it into the NVMe spec proper. They have since been superceeded by NVMe enhancements such as ZNS support. Remove the support per the deprecation schedule. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210812132308.38486-1-hch@lst.de Reviewed-by: Matias Bjørling Reviewed-by: Javier González Signed-off-by: Jens Axboe --- include/uapi/linux/lightnvm.h | 224 ------------------------------------------ 1 file changed, 224 deletions(-) delete mode 100644 include/uapi/linux/lightnvm.h (limited to 'include/uapi') diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h deleted file mode 100644 index 2745afd9b8fa..000000000000 --- a/include/uapi/linux/lightnvm.h +++ /dev/null @@ -1,224 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2015 CNEX Labs. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -#ifndef _UAPI_LINUX_LIGHTNVM_H -#define _UAPI_LINUX_LIGHTNVM_H - -#ifdef __KERNEL__ -#include -#else /* __KERNEL__ */ -#include -#include -#define DISK_NAME_LEN 32 -#endif /* __KERNEL__ */ - -#include -#include - -#define NVM_TTYPE_NAME_MAX 48 -#define NVM_TTYPE_MAX 63 -#define NVM_MMTYPE_LEN 8 - -#define NVM_CTRL_FILE "/dev/lightnvm/control" - -struct nvm_ioctl_info_tgt { - __u32 version[3]; - __u32 reserved; - char tgtname[NVM_TTYPE_NAME_MAX]; -}; - -struct nvm_ioctl_info { - __u32 version[3]; /* in/out - major, minor, patch */ - __u16 tgtsize; /* number of targets */ - __u16 reserved16; /* pad to 4K page */ - __u32 reserved[12]; - struct nvm_ioctl_info_tgt tgts[NVM_TTYPE_MAX]; -}; - -enum { - NVM_DEVICE_ACTIVE = 1 << 0, -}; - -struct nvm_ioctl_device_info { - char devname[DISK_NAME_LEN]; - char bmname[NVM_TTYPE_NAME_MAX]; - __u32 bmversion[3]; - __u32 flags; - __u32 reserved[8]; -}; - -struct nvm_ioctl_get_devices { - __u32 nr_devices; - __u32 reserved[31]; - struct nvm_ioctl_device_info info[31]; -}; - -struct nvm_ioctl_create_simple { - __u32 lun_begin; - __u32 lun_end; -}; - -struct nvm_ioctl_create_extended { - __u16 lun_begin; - __u16 lun_end; - __u16 op; - __u16 rsv; -}; - -enum { - NVM_CONFIG_TYPE_SIMPLE = 0, - NVM_CONFIG_TYPE_EXTENDED = 1, -}; - -struct nvm_ioctl_create_conf { - __u32 type; - union { - struct nvm_ioctl_create_simple s; - struct nvm_ioctl_create_extended e; - }; -}; - -enum { - NVM_TARGET_FACTORY = 1 << 0, /* Init target in factory mode */ -}; - -struct nvm_ioctl_create { - char dev[DISK_NAME_LEN]; /* open-channel SSD device */ - char tgttype[NVM_TTYPE_NAME_MAX]; /* target type name */ - char tgtname[DISK_NAME_LEN]; /* dev to expose target as */ - - __u32 flags; - - struct nvm_ioctl_create_conf conf; -}; - -struct nvm_ioctl_remove { - char tgtname[DISK_NAME_LEN]; - - __u32 flags; -}; - -struct nvm_ioctl_dev_init { - char dev[DISK_NAME_LEN]; /* open-channel SSD device */ - char mmtype[NVM_MMTYPE_LEN]; /* register to media manager */ - - __u32 flags; -}; - -enum { - NVM_FACTORY_ERASE_ONLY_USER = 1 << 0, /* erase only blocks used as - * host blks or grown blks */ - NVM_FACTORY_RESET_HOST_BLKS = 1 << 1, /* remove host blk marks */ - NVM_FACTORY_RESET_GRWN_BBLKS = 1 << 2, /* remove grown blk marks */ - NVM_FACTORY_NR_BITS = 1 << 3, /* stops here */ -}; - -struct nvm_ioctl_dev_factory { - char dev[DISK_NAME_LEN]; - - __u32 flags; -}; - -struct nvm_user_vio { - __u8 opcode; - __u8 flags; - __u16 control; - __u16 nppas; - __u16 rsvd; - __u64 metadata; - __u64 addr; - __u64 ppa_list; - __u32 metadata_len; - __u32 data_len; - __u64 status; - __u32 result; - __u32 rsvd3[3]; -}; - -struct nvm_passthru_vio { - __u8 opcode; - __u8 flags; - __u8 rsvd[2]; - __u32 nsid; - __u32 cdw2; - __u32 cdw3; - __u64 metadata; - __u64 addr; - __u32 metadata_len; - __u32 data_len; - __u64 ppa_list; - __u16 nppas; - __u16 control; - __u32 cdw13; - __u32 cdw14; - __u32 cdw15; - __u64 status; - __u32 result; - __u32 timeout_ms; -}; - -/* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */ -enum { - /* top level cmds */ - NVM_INFO_CMD = 0x20, - NVM_GET_DEVICES_CMD, - - /* device level cmds */ - NVM_DEV_CREATE_CMD, - NVM_DEV_REMOVE_CMD, - - /* Init a device to support LightNVM media managers */ - NVM_DEV_INIT_CMD, - - /* Factory reset device */ - NVM_DEV_FACTORY_CMD, - - /* Vector user I/O */ - NVM_DEV_VIO_ADMIN_CMD = 0x41, - NVM_DEV_VIO_CMD = 0x42, - NVM_DEV_VIO_USER_CMD = 0x43, -}; - -#define NVM_IOCTL 'L' /* 0x4c */ - -#define NVM_INFO _IOWR(NVM_IOCTL, NVM_INFO_CMD, \ - struct nvm_ioctl_info) -#define NVM_GET_DEVICES _IOR(NVM_IOCTL, NVM_GET_DEVICES_CMD, \ - struct nvm_ioctl_get_devices) -#define NVM_DEV_CREATE _IOW(NVM_IOCTL, NVM_DEV_CREATE_CMD, \ - struct nvm_ioctl_create) -#define NVM_DEV_REMOVE _IOW(NVM_IOCTL, NVM_DEV_REMOVE_CMD, \ - struct nvm_ioctl_remove) -#define NVM_DEV_INIT _IOW(NVM_IOCTL, NVM_DEV_INIT_CMD, \ - struct nvm_ioctl_dev_init) -#define NVM_DEV_FACTORY _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \ - struct nvm_ioctl_dev_factory) - -#define NVME_NVM_IOCTL_IO_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_USER_CMD, \ - struct nvm_passthru_vio) -#define NVME_NVM_IOCTL_ADMIN_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_ADMIN_CMD,\ - struct nvm_passthru_vio) -#define NVME_NVM_IOCTL_SUBMIT_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_CMD,\ - struct nvm_user_vio) - -#define NVM_VERSION_MAJOR 1 -#define NVM_VERSION_MINOR 0 -#define NVM_VERSION_PATCHLEVEL 0 - -#endif -- cgit v1.2.3 From 5b4ecc3d4c4aab8d002fe6358885c10e7b57e432 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Mon, 16 Aug 2021 10:15:27 +0800 Subject: ethtool: add two link extended substates of bad signal integrity ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST means the input external clock signal for SerDes is too weak or lost. ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS means the received signal for SerDes is too weak because analog loss of signal. Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 67aa7134b301..b6db6590baf0 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -639,6 +639,8 @@ enum ethtool_link_ext_substate_link_logical_mismatch { enum ethtool_link_ext_substate_bad_signal_integrity { ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1, ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS, }; /* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */ -- cgit v1.2.3 From b89fbfbb854c9afc3047e8273cc3a694650b802e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 15 Aug 2021 00:05:57 -0700 Subject: bpf: Implement minimal BPF perf link Introduce a new type of BPF link - BPF perf link. This brings perf_event-based BPF program attachments (perf_event, tracepoints, kprobes, and uprobes) into the common BPF link infrastructure, allowing to list all active perf_event based attachments, auto-detaching BPF program from perf_event when link's FD is closed, get generic BPF link fdinfo/get_info functionality. BPF_LINK_CREATE command expects perf_event's FD as target_fd. No extra flags are currently supported. Force-detaching and atomic BPF program updates are not yet implemented, but with perf_event-based BPF links we now have common framework for this without the need to extend ioctl()-based perf_event interface. One interesting consideration is a new value for bpf_attach_type, which BPF_LINK_CREATE command expects. Generally, it's either 1-to-1 mapping from bpf_attach_type to bpf_prog_type, or many-to-1 mapping from a subset of bpf_attach_types to one bpf_prog_type (e.g., see BPF_PROG_TYPE_SK_SKB or BPF_PROG_TYPE_CGROUP_SOCK). In this case, though, we have three different program types (KPROBE, TRACEPOINT, PERF_EVENT) using the same perf_event-based mechanism, so it's many bpf_prog_types to one bpf_attach_type. I chose to define a single BPF_PERF_EVENT attach type for all of them and adjust link_create()'s logic for checking correspondence between attach type and program type. The alternative would be to define three new attach types (e.g., BPF_KPROBE, BPF_TRACEPOINT, and BPF_PERF_EVENT), but that seemed like unnecessary overkill and BPF_KPROBE will cause naming conflicts with BPF_KPROBE() macro, defined by libbpf. I chose to not do this to avoid unnecessary proliferation of bpf_attach_type enum values and not have to deal with naming conflicts. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/bpf/20210815070609.987780-5-andrii@kernel.org --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2db6925e04f4..94fe8329b28f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -993,6 +993,7 @@ enum bpf_attach_type { BPF_SK_SKB_VERDICT, BPF_SK_REUSEPORT_SELECT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, + BPF_PERF_EVENT, __MAX_BPF_ATTACH_TYPE }; @@ -1006,6 +1007,7 @@ enum bpf_link_type { BPF_LINK_TYPE_ITER = 4, BPF_LINK_TYPE_NETNS = 5, BPF_LINK_TYPE_XDP = 6, + BPF_LINK_TYPE_PERF_EVENT = 7, MAX_BPF_LINK_TYPE, }; -- cgit v1.2.3 From 82e6b1eee6a8875ef4eacfd60711cce6965c6b04 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 15 Aug 2021 00:05:58 -0700 Subject: bpf: Allow to specify user-provided bpf_cookie for BPF perf links Add ability for users to specify custom u64 value (bpf_cookie) when creating BPF link for perf_event-backed BPF programs (kprobe/uprobe, perf_event, tracepoints). This is useful for cases when the same BPF program is used for attaching and processing invocation of different tracepoints/kprobes/uprobes in a generic fashion, but such that each invocation is distinguished from each other (e.g., BPF program can look up additional information associated with a specific kernel function without having to rely on function IP lookups). This enables new use cases to be implemented simply and efficiently that previously were possible only through code generation (and thus multiple instances of almost identical BPF program) or compilation at runtime (BCC-style) on target hosts (even more expensive resource-wise). For uprobes it is not even possible in some cases to know function IP before hand (e.g., when attaching to shared library without PID filtering, in which case base load address is not known for a library). This is done by storing u64 bpf_cookie in struct bpf_prog_array_item, corresponding to each attached and run BPF program. Given cgroup BPF programs already use two 8-byte pointers for their needs and cgroup BPF programs don't have (yet?) support for bpf_cookie, reuse that space through union of cgroup_storage and new bpf_cookie field. Make it available to kprobe/tracepoint BPF programs through bpf_trace_run_ctx. This is set by BPF_PROG_RUN_ARRAY, used by kprobe/uprobe/tracepoint BPF program execution code, which luckily is now also split from BPF_PROG_RUN_ARRAY_CG. This run context will be utilized by a new BPF helper giving access to this user-provided cookie value from inside a BPF program. Generic perf_event BPF programs will access this value from perf_event itself through passed in BPF program context. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/bpf/20210815070609.987780-6-andrii@kernel.org --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 94fe8329b28f..63ee482d50e1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1448,6 +1448,13 @@ union bpf_attr { __aligned_u64 iter_info; /* extra bpf_iter_link_info */ __u32 iter_info_len; /* iter_info length */ }; + struct { + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 bpf_cookie; + } perf_event; }; } link_create; -- cgit v1.2.3 From 7adfc6c9b315e174cf8743b21b7b691c8766791b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 15 Aug 2021 00:05:59 -0700 Subject: bpf: Add bpf_get_attach_cookie() BPF helper to access bpf_cookie value Add new BPF helper, bpf_get_attach_cookie(), which can be used by BPF programs to get access to a user-provided bpf_cookie value, specified during BPF program attachment (BPF link creation) time. Naming is hard, though. With the concept being named "BPF cookie", I've considered calling the helper: - bpf_get_cookie() -- seems too unspecific and easily mistaken with socket cookie; - bpf_get_bpf_cookie() -- too much tautology; - bpf_get_link_cookie() -- would be ok, but while we create a BPF link to attach BPF program to BPF hook, it's still an "attachment" and the bpf_cookie is associated with BPF program attachment to a hook, not a BPF link itself. Technically, we could support bpf_cookie with old-style cgroup programs.So I ultimately rejected it in favor of bpf_get_attach_cookie(). Currently all perf_event-backed BPF program types support bpf_get_attach_cookie() helper. Follow-up patches will add support for fentry/fexit programs as well. While at it, mark bpf_tracing_func_proto() as static to make it obvious that it's only used from within the kernel/trace/bpf_trace.c. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210815070609.987780-7-andrii@kernel.org --- include/uapi/linux/bpf.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 63ee482d50e1..c4f7892edb2b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4856,6 +4856,21 @@ union bpf_attr { * Get address of the traced function (for tracing and kprobe programs). * Return * Address of the traced function. + * + * u64 bpf_get_attach_cookie(void *ctx) + * Description + * Get bpf_cookie value provided (optionally) during the program + * attachment. It might be different for each individual + * attachment, even if BPF program itself is the same. + * Expects BPF program context *ctx* as a first argument. + * + * Supported for the following program types: + * - kprobe/uprobe; + * - tracepoint; + * - perf_event. + * Return + * Value specified by user at BPF link creation/attachment time + * or 0, if it was not specified. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5032,6 +5047,7 @@ union bpf_attr { FN(timer_start), \ FN(timer_cancel), \ FN(get_func_ip), \ + FN(get_attach_cookie), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 0d2ab3aea50bb02ff0c9c3d53c7b2b4b21cdd59d Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 2 Jul 2021 19:44:07 +0200 Subject: nl80211: add support for BSS coloring This patch adds support for BSS color collisions to the wireless subsystem. Add the required functionality to nl80211 that will notify about color collisions, triggering the color change and notifying when it is completed. Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: John Crispin Link: https://lore.kernel.org/r/500b3582aec8fe2c42ef46f3117b148cb7cbceb5.1625247619.git.lorenzo@kernel.org [remove unnecessary NULL initialisation] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index db474994fa73..c2efea98e060 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1185,6 +1185,21 @@ * passed using %NL80211_ATTR_SAR_SPEC. %NL80211_ATTR_WIPHY is used to * specify the wiphy index to be applied to. * + * @NL80211_CMD_OBSS_COLOR_COLLISION: This notification is sent out whenever + * mac80211/drv detects a bss color collision. + * + * @NL80211_CMD_COLOR_CHANGE_REQUEST: This command is used to indicate that + * userspace wants to change the BSS color. + * + * @NL80211_CMD_COLOR_CHANGE_STARTED: Notify userland, that a color change has + * started + * + * @NL80211_CMD_COLOR_CHANGE_ABORTED: Notify userland, that the color change has + * been aborted + * + * @NL80211_CMD_COLOR_CHANGE_COMPLETED: Notify userland that the color change + * has completed + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1417,6 +1432,14 @@ enum nl80211_commands { NL80211_CMD_SET_SAR_SPECS, + NL80211_CMD_OBSS_COLOR_COLLISION, + + NL80211_CMD_COLOR_CHANGE_REQUEST, + + NL80211_CMD_COLOR_CHANGE_STARTED, + NL80211_CMD_COLOR_CHANGE_ABORTED, + NL80211_CMD_COLOR_CHANGE_COMPLETED, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2560,6 +2583,16 @@ enum nl80211_commands { * disassoc events to indicate that an immediate reconnect to the AP * is desired. * + * @NL80211_ATTR_OBSS_COLOR_BITMAP: bitmap of the u64 BSS colors for the + * %NL80211_CMD_OBSS_COLOR_COLLISION event. + * + * @NL80211_ATTR_COLOR_CHANGE_COUNT: u8 attribute specifying the number of TBTT's + * until the color switch event. + * @NL80211_ATTR_COLOR_CHANGE_COLOR: u8 attribute specifying the color that we are + * switching to + * @NL80211_ATTR_COLOR_CHANGE_ELEMS: Nested set of attributes containing the IE + * information for the time while performing a color switch. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3057,6 +3090,12 @@ enum nl80211_attrs { NL80211_ATTR_DISABLE_HE, + NL80211_ATTR_OBSS_COLOR_BITMAP, + + NL80211_ATTR_COLOR_CHANGE_COUNT, + NL80211_ATTR_COLOR_CHANGE_COLOR, + NL80211_ATTR_COLOR_CHANGE_ELEMS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5953,6 +5992,9 @@ enum nl80211_feature_flags { * frame protection for all management frames exchanged during the * negotiation and range measurement procedure. * + * @NL80211_EXT_FEATURE_BSS_COLOR: The driver supports BSS color collision + * detection and change announcemnts. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6017,6 +6059,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SECURE_LTF, NL80211_EXT_FEATURE_SECURE_RTT, NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE, + NL80211_EXT_FEATURE_BSS_COLOR, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From ea49dc79002c416a9003f3204bc14f846a0dbcae Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 28 Jul 2021 08:56:09 +1000 Subject: NFSD: remove vanity comments Including one's name in copyright claims is appropriate. Including it in random comments is just vanity. After 2 decades, it is time for these to be gone. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/uapi/linux/nfsd/nfsfh.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h index 427294dd56a1..e29e8accc4f4 100644 --- a/include/uapi/linux/nfsd/nfsfh.h +++ b/include/uapi/linux/nfsd/nfsfh.h @@ -33,7 +33,6 @@ struct nfs_fhbase_old { /* * This is the new flexible, extensible style NFSv2/v3/v4 file handle. - * by Neil Brown - March 2000 * * The file handle starts with a sequence of four-byte words. * The first word contains a version number (1) and three descriptor bytes -- cgit v1.2.3 From 2843ff6f36db7074e17bf5d637a14da08c54aed8 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 17 Aug 2021 15:07:23 -0700 Subject: mptcp: remote addresses fullmesh This patch added and managed a new per endpoint flag, named MPTCP_PM_ADDR_FLAG_FULLMESH. In mptcp_pm_create_subflow_or_signal_addr(), if such flag is set, instead of: remote_address((struct sock_common *)sk, &remote); fill a temporary allocated array of all known remote address. After releaseing the pm lock loop on such array and create a subflow for each remote address from the given local. Note that the we could still use an array even for non 'fullmesh' endpoint: with a single entry corresponding to the primary MPC subflow remote address. Suggested-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 7b05f7102321..f66038b9551f 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -73,6 +73,7 @@ enum { #define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0) #define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1) #define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2) +#define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3) enum { MPTCP_PM_CMD_UNSPEC, -- cgit v1.2.3 From 25bca50e523cbe96c0207fbb92f22ff2bc28e9aa Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Aug 2021 12:36:58 +0900 Subject: block: improve ioprio class description comment In include/usapi/linux/ioprio.h, change the ioprio class enum comment to remove the outdated reference to CFQ and mention BFQ and mq-deadline instead. Also document the high priority NCQ command use for RT class IOs directed at ATA drives that support NCQ priority. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210811033702.368488-3-damien.lemoal@wdc.com Signed-off-by: Jens Axboe --- include/uapi/linux/ioprio.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index 77b17e08b0da..6b735854aebd 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -13,10 +13,12 @@ #define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) /* - * These are the io priority groups as implemented by CFQ. RT is the realtime - * class, it always gets premium service. BE is the best-effort scheduling - * class, the default for any process. IDLE is the idle scheduling class, it - * is only served when no one else is using the disk. + * These are the io priority groups as implemented by the BFQ and mq-deadline + * schedulers. RT is the realtime class, it always gets premium service. For + * ATA disks supporting NCQ IO priority, RT class IOs will be processed using + * high priority NCQ commands. BE is the best-effort scheduling class, the + * default for any process. IDLE is the idle scheduling class, it is only + * served when no one else is using the disk. */ enum { IOPRIO_CLASS_NONE, -- cgit v1.2.3 From a553a835ca57668b0d9907d8ec2507ec51292d9a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Aug 2021 12:36:59 +0900 Subject: block: change ioprio_valid() to an inline function Change the ioprio_valid() macro in include/usapi/linux/ioprio.h to an inline function declared on the kernel side in include/linux/ioprio.h. Also improve checks on the class value by checking the upper bound value. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210811033702.368488-4-damien.lemoal@wdc.com Signed-off-by: Jens Axboe --- include/uapi/linux/ioprio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index 6b735854aebd..5064e230374c 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -27,8 +27,6 @@ enum { IOPRIO_CLASS_IDLE, }; -#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE) - /* * 8 best effort priority levels are supported */ -- cgit v1.2.3 From ba05200fcce0a73fa8db16c514fbaa476d1d9399 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Aug 2021 12:37:00 +0900 Subject: block: fix IOPRIO_PRIO_CLASS() and IOPRIO_PRIO_VALUE() macros The ki_ioprio field of struct kiocb is 16-bits (u16) but often handled as an int in the block layer. E.g. ioprio_check_cap() takes an int as argument. With such implicit int casting function calls, the upper 16-bits of the int argument may be left uninitialized by the compiler, resulting in invalid values for the IOPRIO_PRIO_CLASS() macro (garbage upper bits) and in an error return for functions such as ioprio_check_cap(). Fix this by masking the result of the shift by IOPRIO_CLASS_SHIFT bits in the IOPRIO_PRIO_CLASS() macro. The new macro IOPRIO_CLASS_MASK defines the 3-bits mask for the priority class. Similarly, apply the IOPRIO_PRIO_MASK mask to the data argument of the IOPRIO_PRIO_VALUE() macro to ignore the upper bits of the data value. The IOPRIO_CLASS_MASK mask is also applied to the class argument of this macro before shifting the result by IOPRIO_CLASS_SHIFT bits. While at it, also change the argument name of the IOPRIO_PRIO_CLASS() and IOPRIO_PRIO_DATA() macros from "mask" to "ioprio" to reflect the fact that a priority value should be passed rather than a mask. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210811033702.368488-5-damien.lemoal@wdc.com Signed-off-by: Jens Axboe --- include/uapi/linux/ioprio.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index 5064e230374c..936f0d8f30e1 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -5,12 +5,16 @@ /* * Gives us 8 prio classes with 13-bits of data for each class */ -#define IOPRIO_CLASS_SHIFT (13) +#define IOPRIO_CLASS_SHIFT 13 +#define IOPRIO_CLASS_MASK 0x07 #define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) -#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) -#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) -#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) +#define IOPRIO_PRIO_CLASS(ioprio) \ + (((ioprio) >> IOPRIO_CLASS_SHIFT) & IOPRIO_CLASS_MASK) +#define IOPRIO_PRIO_DATA(ioprio) ((ioprio) & IOPRIO_PRIO_MASK) +#define IOPRIO_PRIO_VALUE(class, data) \ + ((((class) & IOPRIO_CLASS_MASK) << IOPRIO_CLASS_SHIFT) | \ + ((data) & IOPRIO_PRIO_MASK)) /* * These are the io priority groups as implemented by the BFQ and mq-deadline -- cgit v1.2.3 From 202bc942c5cd4340d37b06c4e0b8b03f9925d818 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Aug 2021 12:37:01 +0900 Subject: block: Introduce IOPRIO_NR_LEVELS The BFQ scheduler and ioprio_check_cap() both assume that the RT priority class (IOPRIO_CLASS_RT) can have up to 8 different priority levels, similarly to the BE class (IOPRIO_CLASS_iBE). This is controlled using the IOPRIO_BE_NR macro , which is badly named as the number of levels also applies to the RT class. Introduce the class independent IOPRIO_NR_LEVELS macro, defined to 8, to make things clear. Keep the old IOPRIO_BE_NR macro definition as an alias for IOPRIO_NR_LEVELS. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210811033702.368488-6-damien.lemoal@wdc.com Signed-off-by: Jens Axboe --- include/uapi/linux/ioprio.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index 936f0d8f30e1..aac39338d02c 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -32,9 +32,10 @@ enum { }; /* - * 8 best effort priority levels are supported + * The RT and BE priority classes both support up to 8 priority levels. */ -#define IOPRIO_BE_NR (8) +#define IOPRIO_NR_LEVELS 8 +#define IOPRIO_BE_NR IOPRIO_NR_LEVELS enum { IOPRIO_WHO_PROCESS = 1, -- cgit v1.2.3 From e70344c05995a190a56bbd1a23dc2218bcc8c924 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Aug 2021 12:37:02 +0900 Subject: block: fix default IO priority handling The default IO priority is the best effort (BE) class with the normal priority level IOPRIO_NORM (4). However, get_task_ioprio() returns IOPRIO_CLASS_NONE/IOPRIO_NORM as the default priority and get_current_ioprio() returns IOPRIO_CLASS_NONE/0. Let's be consistent with the defined default and have both of these functions return the default priority IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM) when the user did not define another default IO priority for the task. In include/uapi/linux/ioprio.h, introduce the IOPRIO_BE_NORM macro as an alias to IOPRIO_NORM to clarify that this default level applies to the BE priotity class. In include/linux/ioprio.h, define the macro IOPRIO_DEFAULT as IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM) and use this new macro when setting a priority to the default. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210811033702.368488-7-damien.lemoal@wdc.com [axboe: drop unnecessary lightnvm change] Signed-off-by: Jens Axboe --- include/uapi/linux/ioprio.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index aac39338d02c..f70f2596a6bf 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -44,8 +44,9 @@ enum { }; /* - * Fallback BE priority + * Fallback BE priority level. */ -#define IOPRIO_NORM (4) +#define IOPRIO_NORM 4 +#define IOPRIO_BE_NORM IOPRIO_NORM #endif /* _UAPI_LINUX_IOPRIO_H */ -- cgit v1.2.3 From 3cfc88380413d20f777dc6648a38f683962e52bf Mon Sep 17 00:00:00 2001 From: Jie Deng Date: Fri, 23 Jul 2021 13:44:35 +0800 Subject: i2c: virtio: add a virtio i2c frontend driver Add an I2C bus driver for virtio para-virtualization. The controller can be emulated by the backend driver in any device model software by following the virtio protocol. The device specification can be found on https://lists.oasis-open.org/archives/virtio-comment/202101/msg00008.html. By following the specification, people may implement different backend drivers to emulate different controllers according to their needs. Co-developed-by: Conghui Chen Signed-off-by: Conghui Chen Signed-off-by: Jie Deng Reviewed-by: Viresh Kumar Tested-by: Viresh Kumar Acked-by: Michael S. Tsirkin Signed-off-by: Wolfram Sang --- include/uapi/linux/virtio_i2c.h | 41 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/virtio_ids.h | 1 + 2 files changed, 42 insertions(+) create mode 100644 include/uapi/linux/virtio_i2c.h (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_i2c.h b/include/uapi/linux/virtio_i2c.h new file mode 100644 index 000000000000..7c6a6fc01ad6 --- /dev/null +++ b/include/uapi/linux/virtio_i2c.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */ +/* + * Definitions for virtio I2C Adpter + * + * Copyright (c) 2021 Intel Corporation. All rights reserved. + */ + +#ifndef _UAPI_LINUX_VIRTIO_I2C_H +#define _UAPI_LINUX_VIRTIO_I2C_H + +#include +#include + +/* The bit 0 of the @virtio_i2c_out_hdr.@flags, used to group the requests */ +#define VIRTIO_I2C_FLAGS_FAIL_NEXT _BITUL(0) + +/** + * struct virtio_i2c_out_hdr - the virtio I2C message OUT header + * @addr: the controlled device address + * @padding: used to pad to full dword + * @flags: used for feature extensibility + */ +struct virtio_i2c_out_hdr { + __le16 addr; + __le16 padding; + __le32 flags; +}; + +/** + * struct virtio_i2c_in_hdr - the virtio I2C message IN header + * @status: the processing result from the backend + */ +struct virtio_i2c_in_hdr { + __u8 status; +}; + +/* The final status written by the device */ +#define VIRTIO_I2C_MSG_OK 0 +#define VIRTIO_I2C_MSG_ERR 1 + +#endif /* _UAPI_LINUX_VIRTIO_I2C_H */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 70a8057ad4bb..99aa27b100bc 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -55,6 +55,7 @@ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_I2C_ADAPTER 34 /* virtio i2c adapter */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */ /* -- cgit v1.2.3 From 2796d846d74a18cc6563e96eff8bf28c5e06f912 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 20 Aug 2021 15:42:55 +0300 Subject: net: bridge: vlan: convert mcast router global option to per-vlan entry The per-vlan router option controls the port/vlan and host vlan entries' mcast router config. The global option controlled only the host vlan config, but that is unnecessary and incosistent as it's not really a global vlan option, but rather bridge option to control host router config, so convert BRIDGE_VLANDB_GOPTS_MCAST_ROUTER to BRIDGE_VLANDB_ENTRY_MCAST_ROUTER which can be used to control both host vlan and port vlan mcast router config. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index f71a81fdbbc6..2711c3522010 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -506,6 +506,7 @@ enum { BRIDGE_VLANDB_ENTRY_STATE, BRIDGE_VLANDB_ENTRY_TUNNEL_INFO, BRIDGE_VLANDB_ENTRY_STATS, + BRIDGE_VLANDB_ENTRY_MCAST_ROUTER, __BRIDGE_VLANDB_ENTRY_MAX, }; #define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1) @@ -561,7 +562,6 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, - BRIDGE_VLANDB_GOPTS_MCAST_ROUTER, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE, __BRIDGE_VLANDB_GOPTS_MAX -- cgit v1.2.3 From f95937ccf5bd5e0a6bbac2b8e65a87982ffae403 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Mon, 2 Aug 2021 16:56:29 +0000 Subject: KVM: stats: Support linear and logarithmic histogram statistics Add new types of KVM stats, linear and logarithmic histogram. Histogram are very useful for observing the value distribution of time or size related stats. Signed-off-by: Jing Zhang Message-Id: <20210802165633.1866976-2-jingzhangos@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d9e4aabcb31a..a067410ebea5 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1965,7 +1965,9 @@ struct kvm_stats_header { #define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT) #define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT) #define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) -#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK +#define KVM_STATS_TYPE_LINEAR_HIST (0x3 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_LOG_HIST (0x4 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_LOG_HIST #define KVM_STATS_UNIT_SHIFT 4 #define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT) @@ -1988,8 +1990,9 @@ struct kvm_stats_header { * @size: The number of data items for this stats. * Every data item is of type __u64. * @offset: The offset of the stats to the start of stat structure in - * struture kvm or kvm_vcpu. - * @unused: Unused field for future usage. Always 0 for now. + * structure kvm or kvm_vcpu. + * @bucket_size: A parameter value used for histogram stats. It is only used + * for linear histogram stats, specifying the size of the bucket; * @name: The name string for the stats. Its size is indicated by the * &kvm_stats_header->name_size. */ @@ -1998,7 +2001,7 @@ struct kvm_stats_desc { __s16 exponent; __u16 size; __u32 offset; - __u32 unused; + __u32 bucket_size; char name[]; }; -- cgit v1.2.3 From 3a29355a22c0275fe864100794fee58a73175d93 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 19 Aug 2021 10:00:22 +0530 Subject: gpio: Add virtio-gpio driver This patch adds a new driver for Virtio based GPIO devices. This allows a guest VM running Linux to access GPIO lines provided by the host. It supports all basic operations, except interrupts for the GPIO lines. Based on the initial work posted by: "Enrico Weigelt, metux IT consult" . Reviewed-by: Linus Walleij Signed-off-by: Viresh Kumar Signed-off-by: Bartosz Golaszewski --- include/uapi/linux/virtio_gpio.h | 47 ++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/virtio_ids.h | 1 + 2 files changed, 48 insertions(+) create mode 100644 include/uapi/linux/virtio_gpio.h (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_gpio.h b/include/uapi/linux/virtio_gpio.h new file mode 100644 index 000000000000..844574acf095 --- /dev/null +++ b/include/uapi/linux/virtio_gpio.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _LINUX_VIRTIO_GPIO_H +#define _LINUX_VIRTIO_GPIO_H + +#include + +/* Virtio GPIO request types */ +#define VIRTIO_GPIO_MSG_GET_NAMES 0x0001 +#define VIRTIO_GPIO_MSG_GET_DIRECTION 0x0002 +#define VIRTIO_GPIO_MSG_SET_DIRECTION 0x0003 +#define VIRTIO_GPIO_MSG_GET_VALUE 0x0004 +#define VIRTIO_GPIO_MSG_SET_VALUE 0x0005 + +/* Possible values of the status field */ +#define VIRTIO_GPIO_STATUS_OK 0x0 +#define VIRTIO_GPIO_STATUS_ERR 0x1 + +/* Direction types */ +#define VIRTIO_GPIO_DIRECTION_NONE 0x00 +#define VIRTIO_GPIO_DIRECTION_OUT 0x01 +#define VIRTIO_GPIO_DIRECTION_IN 0x02 + +struct virtio_gpio_config { + __u16 ngpio; + __u8 padding[2]; + __u32 gpio_names_size; +} __packed; + +/* Virtio GPIO Request / Response */ +struct virtio_gpio_request { + __u16 type; + __u16 gpio; + __u32 value; +}; + +struct virtio_gpio_response { + __u8 status; + __u8 value; +}; + +struct virtio_gpio_response_get_names { + __u8 status; + __u8 value[]; +}; + +#endif /* _LINUX_VIRTIO_GPIO_H */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 70a8057ad4bb..e04fa2bfc0eb 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -56,6 +56,7 @@ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */ +#define VIRTIO_ID_GPIO 41 /* virtio gpio */ /* * Virtio Transitional IDs -- cgit v1.2.3 From 146054090b0859b28fc39015c7704ccc3c3a347f Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 30 Jun 2021 13:01:49 -0700 Subject: btrfs: initial fsverity support Add support for fsverity in btrfs. To support the generic interface in fs/verity, we add two new item types in the fs tree for inodes with verity enabled. One stores the per-file verity descriptor and btrfs verity item and the other stores the Merkle tree data itself. Verity checking is done in end_page_read just before a page is marked uptodate. This naturally handles a variety of edge cases like holes, preallocated extents, and inline extents. Some care needs to be taken to not try to verity pages past the end of the file, which are accessed by the generic buffered file reading code under some circumstances like reading to the end of the last page and trying to read again. Direct IO on a verity file falls back to buffered reads. Verity relies on PageChecked for the Merkle tree data itself to avoid re-walking up shared paths in the tree. For this reason, we need to cache the Merkle tree data. Since the file is immutable after verity is turned on, we can cache it at an index past EOF. Use the new inode ro_flags to store verity on the inode item, so that we can enable verity on a file, then rollback to an older kernel and still mount the file system and read the file. Since we can't safely write the file anymore without ruining the invariants of the Merkle tree, we mark a ro_compat flag on the file system when a file has verity enabled. Acked-by: Eric Biggers Co-developed-by: Chris Mason Signed-off-by: Chris Mason Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 1 + include/uapi/linux/btrfs_tree.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 22cd037123fa..d7d3cfead056 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -288,6 +288,7 @@ struct btrfs_ioctl_fs_info_args { * first mount when booting older kernel versions. */ #define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID (1ULL << 1) +#define BTRFS_FEATURE_COMPAT_RO_VERITY (1ULL << 2) #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index ccdb40fe40dc..e1c4c732aaba 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -118,6 +118,29 @@ #define BTRFS_INODE_REF_KEY 12 #define BTRFS_INODE_EXTREF_KEY 13 #define BTRFS_XATTR_ITEM_KEY 24 + +/* + * fs verity items are stored under two different key types on disk. + * The descriptor items: + * [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ] + * + * At offset 0, we store a btrfs_verity_descriptor_item which tracks the size + * of the descriptor item and some extra data for encryption. + * Starting at offset 1, these hold the generic fs verity descriptor. The + * latter are opaque to btrfs, we just read and write them as a blob for the + * higher level verity code. The most common descriptor size is 256 bytes. + * + * The merkle tree items: + * [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ] + * + * These also start at offset 0, and correspond to the merkle tree bytes. When + * fsverity asks for page 0 of the merkle tree, we pull up one page starting at + * offset 0 for this key type. These are also opaque to btrfs, we're blindly + * storing whatever fsverity sends down. + */ +#define BTRFS_VERITY_DESC_ITEM_KEY 36 +#define BTRFS_VERITY_MERKLE_ITEM_KEY 37 + #define BTRFS_ORPHAN_ITEM_KEY 48 /* reserve 2-15 close to the inode for later flexibility */ @@ -991,4 +1014,16 @@ struct btrfs_qgroup_limit_item { __le64 rsv_excl; } __attribute__ ((__packed__)); +struct btrfs_verity_descriptor_item { + /* Size of the verity descriptor in bytes */ + __le64 size; + /* + * When we implement support for fscrypt, we will need to encrypt the + * Merkle tree for encrypted verity files. These 128 bits are for the + * eventual storage of an fscrypt initialization vector. + */ + __le64 reserved[2]; + __u8 encryption; +} __attribute__ ((__packed__)); + #endif /* _BTRFS_CTREE_H_ */ -- cgit v1.2.3 From a4ad42d28618eef83bee02e0a19af0d467bd9722 Mon Sep 17 00:00:00 2001 From: Kareem Shaik Date: Wed, 18 Aug 2021 09:57:39 +0200 Subject: ASoC: Intel: Skylake: Support multiple format configs A module can have two kinds of set params, as per topology requirements. For example, one pre-init and one post-init. But currently, there is support for just one type, as the format_config. This patch extends the format_configs to 4, so as to be able to support pre-init, post-init and post-bind type of set params, for the same module, simultaneously. Signed-off-by: Kareem Shaik Signed-off-by: Gustaw Lewandowski Signed-off-by: Cezary Rojewski Tested-by: Lukasz Majczak Link: https://lore.kernel.org/r/20210818075742.1515155-9-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- include/uapi/sound/snd_sst_tokens.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/sound/snd_sst_tokens.h b/include/uapi/sound/snd_sst_tokens.h index 8ba0112e5336..ff3748e9308a 100644 --- a/include/uapi/sound/snd_sst_tokens.h +++ b/include/uapi/sound/snd_sst_tokens.h @@ -233,6 +233,8 @@ * * %SKL_TKN_U32_ASTATE_CLK_SRC: Clock source for A-State entry * + * %SKL_TKN_U32_FMT_CFG_IDX: Format config index + * * module_id and loadable flags dont have tokens as these values will be * read from the DSP FW manifest * @@ -324,7 +326,9 @@ enum SKL_TKNS { SKL_TKN_U32_ASTATE_COUNT, SKL_TKN_U32_ASTATE_KCPS, SKL_TKN_U32_ASTATE_CLK_SRC, - SKL_TKN_MAX = SKL_TKN_U32_ASTATE_CLK_SRC, + + SKL_TKN_U32_FMT_CFG_IDX = 96, + SKL_TKN_MAX = SKL_TKN_U32_FMT_CFG_IDX, }; #endif -- cgit v1.2.3 From e34a02dc40c95d126bb6486dcf802bbb8d1624a0 Mon Sep 17 00:00:00 2001 From: Dmitry Kadashev Date: Thu, 8 Jul 2021 13:34:45 +0700 Subject: io_uring: add support for IORING_OP_MKDIRAT IORING_OP_MKDIRAT behaves like mkdirat(2) and takes the same flags and arguments. Acked-by: Linus Torvalds Signed-off-by: Dmitry Kadashev Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20210708063447.3556403-10-dkadashev@gmail.com [axboe: add splice_fd_in check] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 79126d5cd289..a926407c230e 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -133,6 +133,7 @@ enum { IORING_OP_SHUTDOWN, IORING_OP_RENAMEAT, IORING_OP_UNLINKAT, + IORING_OP_MKDIRAT, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 7a8721f84fcb3b2946a92380b6fc311e017ff02c Mon Sep 17 00:00:00 2001 From: Dmitry Kadashev Date: Thu, 8 Jul 2021 13:34:46 +0700 Subject: io_uring: add support for IORING_OP_SYMLINKAT IORING_OP_SYMLINKAT behaves like symlinkat(2) and takes the same flags and arguments. Acked-by: Linus Torvalds Suggested-by: Christian Brauner Link: https://lore.kernel.org/io-uring/20210514145259.wtl4xcsp52woi6ab@wittgenstein/ Signed-off-by: Dmitry Kadashev Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20210708063447.3556403-11-dkadashev@gmail.com [axboe: add splice_fd_in check] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index a926407c230e..61fd347ab176 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -134,6 +134,7 @@ enum { IORING_OP_RENAMEAT, IORING_OP_UNLINKAT, IORING_OP_MKDIRAT, + IORING_OP_SYMLINKAT, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From cf30da90bc3a26911d369f199411f38b701394de Mon Sep 17 00:00:00 2001 From: Dmitry Kadashev Date: Thu, 8 Jul 2021 13:34:47 +0700 Subject: io_uring: add support for IORING_OP_LINKAT IORING_OP_LINKAT behaves like linkat(2) and takes the same flags and arguments. In some internal places 'hardlink' is used instead of 'link' to avoid confusion with the SQE links. Name 'link' conflicts with the existing 'link' member of io_kiocb. Acked-by: Linus Torvalds Suggested-by: Christian Brauner Link: https://lore.kernel.org/io-uring/20210514145259.wtl4xcsp52woi6ab@wittgenstein/ Signed-off-by: Dmitry Kadashev Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20210708063447.3556403-12-dkadashev@gmail.com [axboe: add splice_fd_in check] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 61fd347ab176..10eb38d2864f 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -44,6 +44,7 @@ struct io_uring_sqe { __u32 splice_flags; __u32 rename_flags; __u32 unlink_flags; + __u32 hardlink_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -135,6 +136,7 @@ enum { IORING_OP_UNLINKAT, IORING_OP_MKDIRAT, IORING_OP_SYMLINKAT, + IORING_OP_LINKAT, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 6fc88c354f3af83ffa2c285b86e76c759755693f Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Thu, 19 Aug 2021 02:24:20 -0700 Subject: bpf: Migrate cgroup_bpf to internal cgroup_bpf_attach_type enum Add an enum (cgroup_bpf_attach_type) containing only valid cgroup_bpf attach types and a function to map bpf_attach_type values to the new enum. Inspired by netns_bpf_attach_type. Then, migrate cgroup_bpf to use cgroup_bpf_attach_type wherever possible. Functionality is unchanged as attach_type_to_prog_type switches in bpf/syscall.c were preventing non-cgroup programs from making use of the invalid cgroup_bpf array slots. As a result struct cgroup_bpf uses 504 fewer bytes relative to when its arrays were sized using MAX_BPF_ATTACH_TYPE. bpf_cgroup_storage is notably not migrated as struct bpf_cgroup_storage_key is part of uapi and contains a bpf_attach_type member which is not meant to be opaque. Similarly, bpf_cgroup_link continues to report its bpf_attach_type member to userspace via fdinfo and bpf_link_info. To ease disambiguation, bpf_attach_type variables are renamed from 'type' to 'atype' when changed to cgroup_bpf_attach_type. Signed-off-by: Dave Marchevsky Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210819092420.1984861-2-davemarchevsky@fb.com --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c4f7892edb2b..191f0b286ee3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -84,7 +84,7 @@ struct bpf_lpm_trie_key { struct bpf_cgroup_storage_key { __u64 cgroup_inode_id; /* cgroup inode id */ - __u32 attach_type; /* program attach type */ + __u32 attach_type; /* program attach type (enum bpf_attach_type) */ }; union bpf_iter_link_info { -- cgit v1.2.3 From 029ee6b14356b94120bedb852dcdaefc0a282cf1 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Fri, 20 Aug 2021 15:35:17 +0800 Subject: ethtool: add two coalesce attributes for CQE mode Currently, there are many drivers who support CQE mode configuration, some configure it as a fixed when initialized, some provide an interface to change it by ethtool private flags. In order to make it more generic, add two new 'ETHTOOL_A_COALESCE_USE_CQE_TX' and 'ETHTOOL_A_COALESCE_USE_CQE_RX' coalesce attributes, then these parameters can be accessed by ethtool netlink coalesce uAPI. Also add an new structure kernel_ethtool_coalesce, then the new parameter can be added into this struct. Signed-off-by: Yufeng Mo Signed-off-by: Huazhong Tan Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index b3b93710eff7..5545f1ca9237 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -377,6 +377,8 @@ enum { ETHTOOL_A_COALESCE_TX_USECS_HIGH, /* u32 */ ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, /* u32 */ ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, /* u32 */ + ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, /* u8 */ + ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, /* u8 */ /* add new constants above here */ __ETHTOOL_A_COALESCE_CNT, -- cgit v1.2.3 From 54404d357284d1405d98c424951357d970f41168 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 16 Aug 2021 09:28:46 -0700 Subject: scsi: fc: Add EDC ELS definition Add Exchange Diagnostic Capabilities (EDC) ELS definition and the following capability descriptors: - Link Fault Capability Descriptor - Congestion Signaling Capability Descriptor Definitions taken from FC-LS-5 r5.01 Link: https://lore.kernel.org/r/20210816162901.121235-2-jsmart2021@gmail.com Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- include/uapi/scsi/fc/fc_els.h | 106 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/scsi/fc/fc_els.h b/include/uapi/scsi/fc/fc_els.h index 91d4be987220..c9812c5c2fc4 100644 --- a/include/uapi/scsi/fc/fc_els.h +++ b/include/uapi/scsi/fc/fc_els.h @@ -41,6 +41,7 @@ enum fc_els_cmd { ELS_REC = 0x13, /* read exchange concise */ ELS_SRR = 0x14, /* sequence retransmission request */ ELS_FPIN = 0x16, /* Fabric Performance Impact Notification */ + ELS_EDC = 0x17, /* Exchange Diagnostic Capabilities */ ELS_RDP = 0x18, /* Read Diagnostic Parameters */ ELS_RDF = 0x19, /* Register Diagnostic Functions */ ELS_PRLI = 0x20, /* process login */ @@ -111,6 +112,7 @@ enum fc_els_cmd { [ELS_REC] = "REC", \ [ELS_SRR] = "SRR", \ [ELS_FPIN] = "FPIN", \ + [ELS_EDC] = "EDC", \ [ELS_RDP] = "RDP", \ [ELS_RDF] = "RDF", \ [ELS_PRLI] = "PRLI", \ @@ -218,6 +220,10 @@ enum fc_els_rjt_explan { enum fc_ls_tlv_dtag { ELS_DTAG_LS_REQ_INFO = 0x00000001, /* Link Service Request Information Descriptor */ + ELS_DTAG_LNK_FAULT_CAP = 0x0001000D, + /* Link Fault Capability Descriptor */ + ELS_DTAG_CG_SIGNAL_CAP = 0x0001000F, + /* Congestion Signaling Capability Descriptor */ ELS_DTAG_LNK_INTEGRITY = 0x00020001, /* Link Integrity Notification Descriptor */ ELS_DTAG_DELIVERY = 0x00020002, @@ -236,6 +242,8 @@ enum fc_ls_tlv_dtag { */ #define FC_LS_TLV_DTAG_INIT { \ { ELS_DTAG_LS_REQ_INFO, "Link Service Request Information" }, \ + { ELS_DTAG_LNK_FAULT_CAP, "Link Fault Capability" }, \ + { ELS_DTAG_CG_SIGNAL_CAP, "Congestion Signaling Capability" }, \ { ELS_DTAG_LNK_INTEGRITY, "Link Integrity Notification" }, \ { ELS_DTAG_DELIVERY, "Delivery Notification Present" }, \ { ELS_DTAG_PEER_CONGEST, "Peer Congestion Notification" }, \ @@ -1144,4 +1152,102 @@ struct fc_els_rdf_resp { }; +/* + * Diagnostic Capability Descriptors for EDC ELS + */ + +/* + * Diagnostic: Link Fault Capability Descriptor + */ +struct fc_diag_lnkflt_desc { + __be32 desc_tag; /* Descriptor Tag (0x0001000D) */ + __be32 desc_len; /* Length of Descriptor (in bytes). + * Size of descriptor excluding + * desc_tag and desc_len fields. + * 12 bytes + */ + __be32 degrade_activate_threshold; + __be32 degrade_deactivate_threshold; + __be32 fec_degrade_interval; +}; + +enum fc_edc_cg_signal_cap_types { + /* Note: Capability: bits 31:4 Rsvd; bits 3:0 are capabilities */ + EDC_CG_SIG_NOTSUPPORTED = 0x00, /* neither supported */ + EDC_CG_SIG_WARN_ONLY = 0x01, + EDC_CG_SIG_WARN_ALARM = 0x02, /* both supported */ +}; + +/* + * Initializer useful for decoding table. + * Please keep this in sync with the above definitions. + */ +#define FC_EDC_CG_SIGNAL_CAP_TYPES_INIT { \ + { EDC_CG_SIG_NOTSUPPORTED, "Signaling Not Supported" }, \ + { EDC_CG_SIG_WARN_ONLY, "Warning Signal" }, \ + { EDC_CG_SIG_WARN_ALARM, "Warning and Alarm Signals" }, \ +} + +enum fc_diag_cg_sig_freq_types { + EDC_CG_SIGFREQ_CNT_MIN = 1, /* Min Frequency Count */ + EDC_CG_SIGFREQ_CNT_MAX = 999, /* Max Frequency Count */ + + EDC_CG_SIGFREQ_SEC = 0x1, /* Units: seconds */ + EDC_CG_SIGFREQ_MSEC = 0x2, /* Units: milliseconds */ +}; + +struct fc_diag_cg_sig_freq { + __be16 count; /* Time between signals + * note: upper 6 bits rsvd + */ + __be16 units; /* Time unit for count + * note: upper 12 bits rsvd + */ +}; + +/* + * Diagnostic: Congestion Signaling Capability Descriptor + */ +struct fc_diag_cg_sig_desc { + __be32 desc_tag; /* Descriptor Tag (0x0001000F) */ + __be32 desc_len; /* Length of Descriptor (in bytes). + * Size of descriptor excluding + * desc_tag and desc_len fields. + * 16 bytes + */ + __be32 xmt_signal_capability; + struct fc_diag_cg_sig_freq xmt_signal_frequency; + __be32 rcv_signal_capability; + struct fc_diag_cg_sig_freq rcv_signal_frequency; +}; + +/* + * ELS_EDC - Exchange Diagnostic Capabilities + */ +struct fc_els_edc { + __u8 edc_cmd; /* command (0x17) */ + __u8 edc_zero[3]; /* specified as zero - part of cmd */ + __be32 desc_len; /* Length of Descriptor List (in bytes). + * Size of ELS excluding edc_cmd, + * edc_zero and desc_len fields. + */ + struct fc_tlv_desc desc[0]; + /* Diagnostic Descriptor list */ +}; + +/* + * ELS EDC LS_ACC Response. + */ +struct fc_els_edc_resp { + struct fc_els_ls_acc acc_hdr; + __be32 desc_list_len; /* Length of response (in + * bytes). Excludes acc_hdr + * and desc_list_len fields. + */ + struct fc_els_lsri_desc lsri; + struct fc_tlv_desc desc[0]; + /* Supported Diagnostic Descriptor list */ +}; + + #endif /* _FC_ELS_H_ */ -- cgit v1.2.3 From b9445598d8c60a1379887b957024b71343965f74 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 25 Aug 2021 12:25:45 +0100 Subject: io_uring: openat directly into fixed fd table Instead of opening a file into a process's file table as usual and then registering the fd within io_uring, some users may want to skip the first step and place it directly into io_uring's fixed file table. This patch adds such a capability for IORING_OP_OPENAT and IORING_OP_OPENAT2. The behaviour is controlled by setting sqe->file_index, where 0 implies the old behaviour using normal file tables. If non-zero value is specified, then it will behave as described and place the file into a fixed file slot sqe->file_index - 1. A file table should be already created, the slot should be valid and empty, otherwise the operation will fail. Keep the error codes consistent with IORING_OP_FILES_UPDATE, ENXIO and EINVAL on inappropriate fixed tables, and return EBADF on collision with already registered file. Note: IOSQE_FIXED_FILE can't be used to switch between modes, because accept takes a file, and it already uses the flag with a different meaning. Suggested-by: Josh Triplett Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/e9b33d1163286f51ea707f87d95bd596dada1e65.1629888991.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 79126d5cd289..45a4f2373694 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -55,7 +55,10 @@ struct io_uring_sqe { } __attribute__((packed)); /* personality to use, if used */ __u16 personality; - __s32 splice_fd_in; + union { + __s32 splice_fd_in; + __u32 file_index; + }; __u64 __pad2[2]; }; -- cgit v1.2.3 From dd6e10fbd9fb86a571d925602c8a24bb4d09a2a7 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Mon, 23 Aug 2021 19:43:49 -0700 Subject: bpf: Add bpf_task_pt_regs() helper The motivation behind this helper is to access userspace pt_regs in a kprobe handler. uprobe's ctx is the userspace pt_regs. kprobe's ctx is the kernelspace pt_regs. bpf_task_pt_regs() allows accessing userspace pt_regs in a kprobe handler. The final case (kernelspace pt_regs in uprobe) is pretty rare (usermode helper) so I think that can be solved later if necessary. More concretely, this helper is useful in doing BPF-based DWARF stack unwinding. Currently the kernel can only do framepointer based stack unwinds for userspace code. This is because the DWARF state machines are too fragile to be computed in kernelspace [0]. The idea behind DWARF-based stack unwinds w/ BPF is to copy a chunk of the userspace stack (while in prog context) and send it up to userspace for unwinding (probably with libunwind) [1]. This would effectively enable profiling applications with -fomit-frame-pointer using kprobes and uprobes. [0]: https://lkml.org/lkml/2012/2/10/356 [1]: https://github.com/danobi/bpf-dwarf-walk Signed-off-by: Daniel Xu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/e2718ced2d51ef4268590ab8562962438ab82815.1629772842.git.dxu@dxuuu.xyz --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 191f0b286ee3..791f31dd0abe 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4871,6 +4871,12 @@ union bpf_attr { * Return * Value specified by user at BPF link creation/attachment time * or 0, if it was not specified. + * + * long bpf_task_pt_regs(struct task_struct *task) + * Description + * Get the struct pt_regs associated with **task**. + * Return + * A pointer to struct pt_regs. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5048,6 +5054,7 @@ union bpf_attr { FN(timer_cancel), \ FN(get_func_ip), \ FN(get_attach_cookie), \ + FN(task_pt_regs), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 6a241d2923c2c0f6893c78c79421ceb3935691fd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 17 Aug 2021 17:53:29 +0200 Subject: um: virt-pci: fix uapi documentation The identifier names in the documentation here didn't match the real ones, and the reserved was missing. Fix that. Reported-by: Bjorn Helgaas Fixes: 68f5d3f3b654 ("um: add PCI over virtio emulation driver") Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- include/uapi/linux/virtio_pcidev.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_pcidev.h b/include/uapi/linux/virtio_pcidev.h index 89daa88bcfef..668b07ce515b 100644 --- a/include/uapi/linux/virtio_pcidev.h +++ b/include/uapi/linux/virtio_pcidev.h @@ -9,13 +9,14 @@ /** * enum virtio_pcidev_ops - virtual PCI device operations + * @VIRTIO_PCIDEV_OP_RESERVED: reserved to catch errors * @VIRTIO_PCIDEV_OP_CFG_READ: read config space, size is 1, 2, 4 or 8; * the @data field should be filled in by the device (in little endian). * @VIRTIO_PCIDEV_OP_CFG_WRITE: write config space, size is 1, 2, 4 or 8; * the @data field contains the data to write (in little endian). - * @VIRTIO_PCIDEV_OP_BAR_READ: read BAR mem/pio, size can be variable; + * @VIRTIO_PCIDEV_OP_MMIO_READ: read BAR mem/pio, size can be variable; * the @data field should be filled in by the device (in little endian). - * @VIRTIO_PCIDEV_OP_BAR_WRITE: write BAR mem/pio, size can be variable; + * @VIRTIO_PCIDEV_OP_MMIO_WRITE: write BAR mem/pio, size can be variable; * the @data field contains the data to write (in little endian). * @VIRTIO_PCIDEV_OP_MMIO_MEMSET: memset MMIO, size is variable but * the @data field only has one byte (unlike @VIRTIO_PCIDEV_OP_MMIO_WRITE) -- cgit v1.2.3 From 49b99da2c9ce13ffcd93fe3a0f5670791c1d76f7 Mon Sep 17 00:00:00 2001 From: Rocco Yue Date: Fri, 27 Aug 2021 23:04:12 +0800 Subject: ipv6: add IFLA_INET6_RA_MTU to expose mtu value The kernel provides a "/proc/sys/net/ipv6/conf//mtu" file, which can temporarily record the mtu value of the last received RA message when the RA mtu value is lower than the interface mtu, but this proc has following limitations: (1) when the interface mtu (/sys/class/net//mtu) is updeated, mtu6 (/proc/sys/net/ipv6/conf//mtu) will be updated to the value of interface mtu; (2) mtu6 (/proc/sys/net/ipv6/conf//mtu) only affect ipv6 connection, and not affect ipv4. Therefore, when the mtu option is carried in the RA message, there will be a problem that the user sometimes cannot obtain RA mtu value correctly by reading mtu6. After this patch set, if a RA message carries the mtu option, you can send a netlink msg which nlmsg_type is RTM_GETLINK, and then by parsing the attribute of IFLA_INET6_RA_MTU to get the mtu value carried in the RA message received on the inet6 device. In addition, you can also get a link notification when ra_mtu is updated so it doesn't have to poll. In this way, if the MTU values that the device receives from the network in the PCO IPv4 and the RA IPv6 procedures are different, the user can obtain the correct ipv6 ra_mtu value and compare the value of ra_mtu and ipv4 mtu, then the device can use the lower MTU value for both IPv4 and IPv6. Signed-off-by: Rocco Yue Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210827150412.9267-1-rocco.yue@mediatek.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8aad65b69054..eebd3894fe89 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -417,6 +417,7 @@ enum { IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ IFLA_INET6_TOKEN, /* device token */ IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */ + IFLA_INET6_RA_MTU, /* mtu carried in the RA message */ __IFLA_INET6_MAX }; -- cgit v1.2.3 From 486e19795f2ee11f0334e2e3fcf8951d4981ff88 Mon Sep 17 00:00:00 2001 From: Yuri Nudelman Date: Thu, 3 Jun 2021 17:51:58 +0300 Subject: habanalabs: allow fail on inability to respect hint A new user flag is required to make memory map hint mandatory, in contrast to the current situation where it is best effort. This is due to the requirement to map certain data to specific pre-determined device virtual address ranges. Signed-off-by: Yuri Nudelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index a47a731e4527..18765eb75b65 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -813,6 +813,7 @@ union hl_wait_cs_args { #define HL_MEM_CONTIGUOUS 0x1 #define HL_MEM_SHARED 0x2 #define HL_MEM_USERPTR 0x4 +#define HL_MEM_FORCE_HINT 0x8 struct hl_mem_in { union { -- cgit v1.2.3 From 215f0c1775d5506c8a833b5c85a77b5fb65bf26b Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Mon, 14 Jun 2021 22:18:41 +0300 Subject: habanalabs: add wait-for-multi-CS uAPI When user sends multiple CSs, waiting for each CS is not efficient as it involves many user-kernel context switches. In order to address this issue we add support to "wait on multiple CSs" using a new uAPI which can wait on maximum of 32 CSs. The new uAPI is defined using a new flag - WAIT_FOR_MULTI_CS - in the wait_for_cs IOCTL. The input parameters for this uAPI will be: @seq: user pointer to an array of up to 32 CS's sequence numbers. @seq_array_len: length of sequence array. @timeout_us: timeout for waiting for any CS. The output paramateres for this API will be: @status: multi CS ioctl completion status (dedicated status was added as well). @flags: bitmap of output flags of the CS. @cs_completion_map: bitmap for multi CS, if CS sequence that was placed in index N in input seq array has completed- the N-th bit in cs_completion_map will be 1, otherwise it will be 0. @timestamp_nsec: timestamp of the first completed CS Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 18765eb75b65..49c737c4a2f6 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -735,11 +735,18 @@ union hl_cs_args { #define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 #define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 +#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 + +#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32 struct hl_wait_cs_in { union { struct { - /* Command submission sequence number */ + /* + * In case of wait_cs holds the CS sequence number. + * In case of wait for multi CS hold a user pointer to + * an array of CS sequence numbers + */ __u64 seq; /* Absolute timeout to wait for command submission * in microseconds @@ -767,12 +774,17 @@ struct hl_wait_cs_in { /* Context ID - Currently not in use */ __u32 ctx_id; + /* HL_WAIT_CS_FLAGS_* * If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include * interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order * not to specify an interrupt id ,set mask to all 1s. */ __u32 flags; + + /* Multi CS API info- valid entries in multi-CS array */ + __u8 seq_arr_len; + __u8 pad[7]; }; #define HL_WAIT_CS_STATUS_COMPLETED 0 @@ -789,8 +801,15 @@ struct hl_wait_cs_out { __u32 status; /* HL_WAIT_CS_STATUS_FLAG* */ __u32 flags; - /* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */ + /* + * valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set + * for wait_cs: timestamp of CS completion + * for wait_multi_cs: timestamp of FIRST CS completion + */ __s64 timestamp_nsec; + /* multi CS completion bitmap */ + __u32 cs_completion_map; + __u32 pad; }; union hl_wait_cs_args { -- cgit v1.2.3 From dadf17abb7245d9556591d8cc78bf57462e3b20a Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Mon, 24 May 2021 18:09:22 +0300 Subject: habanalabs: add support for encapsulated signals reservation The signaling from within encapsulated OP capability is merged into the existing stream architecture, such that one can trigger multiple signaling from an encapsulated op, according to the time the event was done in the graph execution and avoid the need to wait for the whole encapsulated OP execution to be complete before the stream can signal. This commit implements only the reserve/unreserve part. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 110 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 97 insertions(+), 13 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 49c737c4a2f6..eca86c545916 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -628,12 +628,21 @@ struct hl_cs_chunk { __u64 cb_handle; /* Relevant only when HL_CS_FLAGS_WAIT or - * HL_CS_FLAGS_COLLECTIVE_WAIT is set. + * HL_CS_FLAGS_COLLECTIVE_WAIT is set * This holds address of array of u64 values that contain - * signal CS sequence numbers. The wait described by this job - * will listen on all those signals (wait event per signal) + * signal CS sequence numbers. The wait described by + * this job will listen on all those signals + * (wait event per signal) */ __u64 signal_seq_arr; + + /* + * Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set + * along with HL_CS_FLAGS_ENCAP_SIGNALS. + * This is the CS sequence which has the encapsulated signals. + */ + __u64 encaps_signal_seq; }; /* Index of queue to put the CB on */ @@ -651,6 +660,17 @@ struct hl_cs_chunk { * Number of entries in signal_seq_arr */ __u32 num_signal_seq_arr; + + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set along + * with HL_CS_FLAGS_ENCAP_SIGNALS + * This set the signals range that the user want to wait for + * out of the whole reserved signals range. + * e.g if the signals range is 20, and user don't want + * to wait for signal 8, so he set this offset to 7, then + * he call the API again with 9 and so on till 20. + */ + __u32 encaps_signal_offset; }; /* HL_CS_CHUNK_FLAGS_* */ @@ -678,6 +698,28 @@ struct hl_cs_chunk { #define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200 #define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400 +/* + * The encapsulated signals CS is merged into the existing CS ioctls. + * In order to use this feature need to follow the below procedure: + * 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY + * the output of this API will be the SOB offset from CFG_BASE. + * this address will be used to patch CB cmds to do the signaling for this + * SOB by incrementing it's value. + * for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY + * CS type, note that this might fail if out-of-sync happened to the SOB + * value, in case other signaling request to the same SOB occurred between + * reserve-unreserve calls. + * 2. Use the staged CS to do the encapsulated signaling jobs. + * use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST + * along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset + * field. This offset allows app to wait on part of the reserved signals. + * 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag + * to wait for the encapsulated signals. + */ +#define HL_CS_FLAGS_ENCAP_SIGNALS 0x800 +#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000 +#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000 + #define HL_CS_STATUS_SUCCESS 0 #define HL_MAX_JOBS_PER_CS 512 @@ -690,10 +732,35 @@ struct hl_cs_in { /* holds address of array of hl_cs_chunk for execution phase */ __u64 chunks_execute; - /* Sequence number of a staged submission CS - * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set - */ - __u64 seq; + union { + /* + * Sequence number of a staged submission CS + * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and + * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset. + */ + __u64 seq; + + /* + * Encapsulated signals handle id + * Valid for two flows: + * 1. CS with encapsulated signals: + * when HL_CS_FLAGS_STAGED_SUBMISSION and + * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST + * and HL_CS_FLAGS_ENCAP_SIGNALS are set. + * 2. unreserve signals: + * valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set. + */ + __u32 encaps_sig_handle_id; + + /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ + struct { + /* Encapsulated signals number */ + __u32 encaps_signals_count; + + /* Encapsulated signals queue index (stream) */ + __u32 encaps_signals_q_idx; + }; + }; /* Number of chunks in restore phase array. Maximum number is * HL_MAX_JOBS_PER_CS @@ -718,14 +785,31 @@ struct hl_cs_in { }; struct hl_cs_out { + union { + /* + * seq holds the sequence number of the CS to pass to wait + * ioctl. All values are valid except for 0 and ULLONG_MAX + */ + __u64 seq; + + /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ + struct { + /* This is the resereved signal handle id */ + __u32 handle_id; + + /* This is the signals count */ + __u32 count; + }; + }; + + /* HL_CS_STATUS */ + __u32 status; + /* - * seq holds the sequence number of the CS to pass to wait ioctl. All - * values are valid except for 0 and ULLONG_MAX + * SOB base address offset + * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ - __u64 seq; - /* HL_CS_STATUS_* */ - __u32 status; - __u32 pad; + __u32 sob_base_addr_offset; }; union hl_cs_args { -- cgit v1.2.3 From d8071323c5632bdf0a8ef9b9e5662fac43649f9d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 19 Aug 2021 09:34:06 -0700 Subject: dmaengine: idxd: fix setting up priv mode for dwq DSA spec says WQ priv bit is 0 if the Privileged Mode Enable field of the PCI Express PASID capability is 0 and pasid is enabled. Make sure that the WQCFG priv field is set correctly according to usage type. Reject config if setting up kernel WQ type and no support. Also add the correct priv setup for a descriptor. Fixes: 484f910e93b4 ("dmaengine: idxd: fix wq config registers offset programming") Cc: Ramesh Thomas Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162939084657.903168.14160019185148244596.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index ca24c25252fb..c750eac09fc9 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -27,6 +27,7 @@ enum idxd_scmd_stat { IDXD_SCMD_WQ_NO_SWQ_SUPPORT = 0x800c0000, IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000, IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, + IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, }; #define IDXD_SCMD_SOFTERR_MASK 0x80000000 -- cgit v1.2.3 From 2e480058ddc21ec53a10e8b41623e245e908bdbc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 27 Aug 2021 11:33:19 -0600 Subject: io-wq: provide a way to limit max number of workers io-wq divides work into two categories: 1) Work that completes in a bounded time, like reading from a regular file or a block device. This type of work is limited based on the size of the SQ ring. 2) Work that may never complete, we call this unbounded work. The amount of workers here is just limited by RLIMIT_NPROC. For various uses cases, it's handy to have the kernel limit the maximum amount of pending workers for both categories. Provide a way to do with with a new IORING_REGISTER_IOWQ_MAX_WORKERS operation. IORING_REGISTER_IOWQ_MAX_WORKERS takes an array of two integers and sets the max worker count to what is being passed in for each category. The old values are returned into that same array. If 0 is being passed in for either category, it simply returns the current value. The value is capped at RLIMIT_NPROC. This actually isn't that important as it's more of a hint, if we're exceeding the value then our attempt to fork a new worker will fail. This happens naturally already if more than one node is in the system, as these values are per-node internally for io-wq. Reported-by: Johannes Lundberg Link: https://github.com/axboe/liburing/issues/420 Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 45a4f2373694..64fe809c4e36 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -309,6 +309,9 @@ enum { IORING_REGISTER_IOWQ_AFF = 17, IORING_UNREGISTER_IOWQ_AFF = 18, + /* set/get max number of workers */ + IORING_REGISTER_IOWQ_MAX_WORKERS = 19, + /* this goes last */ IORING_REGISTER_LAST }; -- cgit v1.2.3 From 50c1df2b56e0f581b1dbf334dbf807d6fb8f77b2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 27 Aug 2021 17:11:06 -0600 Subject: io_uring: support CLOCK_BOOTTIME/REALTIME for timeouts Certain use cases want to use CLOCK_BOOTTIME or CLOCK_REALTIME rather than CLOCK_MONOTONIC, instead of the default CLOCK_MONOTONIC. Add an IORING_TIMEOUT_BOOTTIME and IORING_TIMEOUT_REALTIME flag that allows timeouts and linked timeouts to use the selected clock source. Only one clock source may be selected, and we -EINVAL the request if more than one is given. If neither BOOTIME nor REALTIME are selected, the previous default of MONOTONIC is used. Link: https://github.com/axboe/liburing/issues/369 Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 64fe809c4e36..b6d28d927a3f 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -151,6 +151,9 @@ enum { */ #define IORING_TIMEOUT_ABS (1U << 0) #define IORING_TIMEOUT_UPDATE (1U << 1) +#define IORING_TIMEOUT_BOOTTIME (1U << 2) +#define IORING_TIMEOUT_REALTIME (1U << 3) +#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) /* * sqe->splice_flags -- cgit v1.2.3 From f1042b6ccb887f07301f6b096b3d0cfcf9189323 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 28 Aug 2021 19:54:39 -0600 Subject: io_uring: allow updating linked timeouts We allow updating normal timeouts, add support for adjusting timings of linked timeouts as well. Reported-by: Victor Stewart Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b6d28d927a3f..3caec9199658 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -149,12 +149,13 @@ enum { /* * sqe->timeout_flags */ -#define IORING_TIMEOUT_ABS (1U << 0) -#define IORING_TIMEOUT_UPDATE (1U << 1) -#define IORING_TIMEOUT_BOOTTIME (1U << 2) -#define IORING_TIMEOUT_REALTIME (1U << 3) +#define IORING_TIMEOUT_ABS (1U << 0) +#define IORING_TIMEOUT_UPDATE (1U << 1) +#define IORING_TIMEOUT_BOOTTIME (1U << 2) +#define IORING_TIMEOUT_REALTIME (1U << 3) +#define IORING_LINK_TIMEOUT_UPDATE (1U << 4) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) - +#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) /* * sqe->splice_flags * extends splice(2) flags -- cgit v1.2.3 From d7e7747ac5c2496c98291944c6066adaa9f3b975 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 26 Aug 2021 15:54:22 +0200 Subject: netfilter: refuse insertion if chain has grown too large Also add a stat counter for this that gets exported both via old /proc interface and ctnetlink. Assuming the old default size of 16536 buckets and max hash occupancy of 64k, this results in 128k insertions (origin+reply), so ~8 entries per chain on average. The revised settings in this series will result in about two entries per bucket on average. This allows a hard-limit ceiling of 64. This is not tunable at the moment, but its possible to either increase nf_conntrack_buckets or decrease nf_conntrack_max to reduce average lengths. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index d8484be72fdc..5ade231f497b 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -257,6 +257,7 @@ enum ctattr_stats_cpu { CTA_STATS_ERROR, CTA_STATS_SEARCH_RESTART, CTA_STATS_CLASH_RESOLVE, + CTA_STATS_CHAIN_TOOLONG, __CTA_STATS_MAX, }; #define CTA_STATS_MAX (__CTA_STATS_MAX - 1) -- cgit v1.2.3 From 17395d7742baa4737e9d3b4672cc3d10e5970998 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 31 Aug 2021 10:59:25 +0530 Subject: gpio: virtio: Fix sparse warnings Fix warnings reported by sparse, related to type mismatch between u16 and __le16. Reported-by: kernel test robot Fixes: 3a29355a22c0 ("gpio: Add virtio-gpio driver") Signed-off-by: Viresh Kumar Acked-by: Michael S. Tsirkin Signed-off-by: Bartosz Golaszewski --- include/uapi/linux/virtio_gpio.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_gpio.h b/include/uapi/linux/virtio_gpio.h index 844574acf095..0445f905d8cc 100644 --- a/include/uapi/linux/virtio_gpio.h +++ b/include/uapi/linux/virtio_gpio.h @@ -22,16 +22,16 @@ #define VIRTIO_GPIO_DIRECTION_IN 0x02 struct virtio_gpio_config { - __u16 ngpio; + __le16 ngpio; __u8 padding[2]; - __u32 gpio_names_size; + __le32 gpio_names_size; } __packed; /* Virtio GPIO Request / Response */ struct virtio_gpio_request { - __u16 type; - __u16 gpio; - __u32 value; + __le16 type; + __le16 gpio; + __le32 value; }; struct virtio_gpio_response { -- cgit v1.2.3 From 5dc9ffaff1420676827c4054bc789a7710f2a272 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 15 Jul 2021 10:48:43 +0300 Subject: habanalabs: expose server type in INFO IOCTL Add the server type property to the hl_info_hw_ip_info structure that is exposed to the user via the INFO IOCTL. This is needed by the userspace s/w stack to know the connections map of the internal links that connect the ASIC among themselves inside the server. The F/W will tell us, as part of the NIC information, the server type that the GAUDI is located in. Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 48 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index eca86c545916..6686b73a0834 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -279,6 +279,14 @@ enum hl_device_status { HL_DEVICE_STATUS_NEEDS_RESET }; +enum hl_server_type { + HL_SERVER_TYPE_UNKNOWN = 0, + HL_SERVER_GAUDI_HLS1 = 1, + HL_SERVER_GAUDI_HLS1H = 2, + HL_SERVER_GAUDI_TYPE1 = 3, + HL_SERVER_GAUDI_TYPE2 = 4 +}; + /* Opcode for management ioctl * * HW_IP_INFO - Receive information about different IP blocks in the @@ -337,17 +345,49 @@ enum hl_device_status { #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 +/** + * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC + * @sram_base_address: The first SRAM physical base address that is free to be + * used by the user. + * @dram_base_address: The first DRAM virtual or physical base address that is + * free to be used by the user. + * @dram_size: The DRAM size that is available to the user. + * @sram_size: The SRAM size that is available to the user. + * @num_of_events: The number of events that can be received from the f/w. This + * is needed so the user can what is the size of the h/w events + * array he needs to pass to the kernel when he wants to fetch + * the event counters. + * @device_id: PCI device ID of the ASIC. + * @module_id: Module ID of the ASIC for mezzanine cards in servers + * (From OCP spec). + * @first_available_interrupt_id: The first available interrupt ID for the user + * to be used when it works with user interrupts. + * @server_type: Server type that the Gaudi ASIC is currently installed in. + * The value is according to enum hl_server_type + * @cpld_version: CPLD version on the board. + * @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler + * in some ASICs. + * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant + * for Goya/Gaudi only. + * @dram_enabled: Whether the DRAM is enabled. + * @cpucp_version: The CPUCP f/w version. + * @card_name: The card name as passed by the f/w. + * @dram_page_size: The DRAM physical page size. + */ struct hl_info_hw_ip_info { __u64 sram_base_address; __u64 dram_base_address; __u64 dram_size; __u32 sram_size; __u32 num_of_events; - __u32 device_id; /* PCI Device ID */ - __u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */ + __u32 device_id; + __u32 module_id; __u32 reserved; __u16 first_available_interrupt_id; - __u16 reserved2; + __u16 server_type; __u32 cpld_version; __u32 psoc_pci_pll_nr; __u32 psoc_pci_pll_nf; @@ -358,7 +398,7 @@ struct hl_info_hw_ip_info { __u8 pad[2]; __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; - __u64 reserved3; + __u64 reserved2; __u64 dram_page_size; }; -- cgit v1.2.3 From 71731090ab17a208a58020e4b342fdfee280458a Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Mon, 16 Aug 2021 13:27:12 +0300 Subject: habanalabs: add "in device creation" status On init, the disabled state is cleared right before hw_init and that causes the device to report on "Operational" state before the device initialization is finished. Although the char device is not yet exposed to the user at this stage, the sysfs entries are exposed. This can cause errors in monitoring applications that use the sysfs entries. In order to avoid this, a new state "in device creation" is introduced to ne reported when the device is not disabled but is still in init flow. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 6686b73a0834..7cc2a0f3f2f5 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -276,7 +276,9 @@ enum hl_device_status { HL_DEVICE_STATUS_OPERATIONAL, HL_DEVICE_STATUS_IN_RESET, HL_DEVICE_STATUS_MALFUNCTION, - HL_DEVICE_STATUS_NEEDS_RESET + HL_DEVICE_STATUS_NEEDS_RESET, + HL_DEVICE_STATUS_IN_DEVICE_CREATION, + HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION }; enum hl_server_type { -- cgit v1.2.3 From b27abaccf8e8b012f126da0c2a1ab32723ec8b9f Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 2 Sep 2021 15:00:06 -0700 Subject: mm/mempolicy: add MPOL_PREFERRED_MANY for multiple preferred nodes Patch series "Introduce multi-preference mempolicy", v7. This patch series introduces the concept of the MPOL_PREFERRED_MANY mempolicy. This mempolicy mode can be used with either the set_mempolicy(2) or mbind(2) interfaces. Like the MPOL_PREFERRED interface, it allows an application to set a preference for nodes which will fulfil memory allocation requests. Unlike the MPOL_PREFERRED mode, it takes a set of nodes. Like the MPOL_BIND interface, it works over a set of nodes. Unlike MPOL_BIND, it will not cause a SIGSEGV or invoke the OOM killer if those preferred nodes are not available. Along with these patches are patches for libnuma, numactl, numademo, and memhog. They still need some polish, but can be found here: https://gitlab.com/bwidawsk/numactl/-/tree/prefer-many It allows new usage: `numactl -P 0,3,4` The goal of the new mode is to enable some use-cases when using tiered memory usage models which I've lovingly named. 1a. The Hare - The interconnect is fast enough to meet bandwidth and latency requirements allowing preference to be given to all nodes with "fast" memory. 1b. The Indiscriminate Hare - An application knows it wants fast memory (or perhaps slow memory), but doesn't care which node it runs on. The application can prefer a set of nodes and then xpu bind to the local node (cpu, accelerator, etc). This reverses the nodes are chosen today where the kernel attempts to use local memory to the CPU whenever possible. This will attempt to use the local accelerator to the memory. 2. The Tortoise - The administrator (or the application itself) is aware it only needs slow memory, and so can prefer that. Much of this is almost achievable with the bind interface, but the bind interface suffers from an inability to fallback to another set of nodes if binding fails to all nodes in the nodemask. Like MPOL_BIND a nodemask is given. Inherently this removes ordering from the preference. > /* Set first two nodes as preferred in an 8 node system. */ > const unsigned long nodes = 0x3 > set_mempolicy(MPOL_PREFER_MANY, &nodes, 8); > /* Mimic interleave policy, but have fallback *. > const unsigned long nodes = 0xaa > set_mempolicy(MPOL_PREFER_MANY, &nodes, 8); Some internal discussion took place around the interface. There are two alternatives which we have discussed, plus one I stuck in: 1. Ordered list of nodes. Currently it's believed that the added complexity is nod needed for expected usecases. 2. A flag for bind to allow falling back to other nodes. This confuses the notion of binding and is less flexible than the current solution. 3. Create flags or new modes that helps with some ordering. This offers both a friendlier API as well as a solution for more customized usage. It's unknown if it's worth the complexity to support this. Here is sample code for how this might work: > // Prefer specific nodes for some something wacky > set_mempolicy(MPOL_PREFER_MANY, 0x17c, 1024); > > // Default > set_mempolicy(MPOL_PREFER_MANY | MPOL_F_PREFER_ORDER_SOCKET, NULL, 0); > // which is the same as > set_mempolicy(MPOL_DEFAULT, NULL, 0); > > // The Hare > set_mempolicy(MPOL_PREFER_MANY | MPOL_F_PREFER_ORDER_TYPE, NULL, 0); > > // The Tortoise > set_mempolicy(MPOL_PREFER_MANY | MPOL_F_PREFER_ORDER_TYPE_REV, NULL, 0); > > // Prefer the fast memory of the first two sockets > set_mempolicy(MPOL_PREFER_MANY | MPOL_F_PREFER_ORDER_TYPE, -1, 2); > This patch (of 5): The NUMA APIs currently allow passing in a "preferred node" as a single bit set in a nodemask. If more than one bit it set, bits after the first are ignored. This single node is generally OK for location-based NUMA where memory being allocated will eventually be operated on by a single CPU. However, in systems with multiple memory types, folks want to target a *type* of memory instead of a location. For instance, someone might want some high-bandwidth memory but do not care about the CPU next to which it is allocated. Or, they want a cheap, high capacity allocation and want to target all NUMA nodes which have persistent memory in volatile mode. In both of these cases, the application wants to target a *set* of nodes, but does not want strict MPOL_BIND behavior as that could lead to OOM killer or SIGSEGV. So add MPOL_PREFERRED_MANY policy to support the multiple preferred nodes requirement. This is not a pie-in-the-sky dream for an API. This was a response to a specific ask of more than one group at Intel. Specifically: 1. There are existing libraries that target memory types such as https://github.com/memkind/memkind. These are known to suffer from SIGSEGV's when memory is low on targeted memory "kinds" that span more than one node. The MCDRAM on a Xeon Phi in "Cluster on Die" mode is an example of this. 2. Volatile-use persistent memory users want to have a memory policy which is targeted at either "cheap and slow" (PMEM) or "expensive and fast" (DRAM). However, they do not want to experience allocation failures when the targeted type is unavailable. 3. Allocate-then-run. Generally, we let the process scheduler decide on which physical CPU to run a task. That location provides a default allocation policy, and memory availability is not generally considered when placing tasks. For situations where memory is valuable and constrained, some users want to allocate memory first, *then* allocate close compute resources to the allocation. This is the reverse of the normal (CPU) model. Accelerators such as GPUs that operate on core-mm-managed memory are interested in this model. A check is added in sanitize_mpol_flags() to not permit 'prefer_many' policy to be used for now, and will be removed in later patch after all implementations for 'prefer_many' are ready, as suggested by Michal Hocko. [mhocko@kernel.org: suggest to refine policy_node/policy_nodemask handling] Link: https://lkml.kernel.org/r/1627970362-61305-1-git-send-email-feng.tang@intel.com Link: https://lore.kernel.org/r/20200630212517.308045-4-ben.widawsky@intel.com Link: https://lkml.kernel.org/r/1627970362-61305-2-git-send-email-feng.tang@intel.com Co-developed-by: Ben Widawsky Signed-off-by: Ben Widawsky Signed-off-by: Dave Hansen Signed-off-by: Feng Tang Cc: Michal Hocko Acked-by: Michal Hocko Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Mike Kravetz Cc: Randy Dunlap Cc: Vlastimil Babka Cc: Andi Kleen Cc: Dan Williams Cc: Huang Ying b Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/mempolicy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 19a00bc7fe86..046d0ccba4cd 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -22,6 +22,7 @@ enum { MPOL_BIND, MPOL_INTERLEAVE, MPOL_LOCAL, + MPOL_PREFERRED_MANY, MPOL_MAX, /* always last member of enum */ }; -- cgit v1.2.3 From dce49103962840dd61423d7627748d6c558d58c5 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 2 Sep 2021 15:00:33 -0700 Subject: mm: wire up syscall process_mrelease Split off from prev patch in the series that implements the syscall. Link: https://lkml.kernel.org/r/20210809185259.405936-2-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Geert Uytterhoeven Cc: Andy Lutomirski Cc: Christian Brauner Cc: Christoph Hellwig Cc: David Hildenbrand Cc: David Rientjes Cc: Florian Weimer Cc: Jan Engelhardt Cc: Jann Horn Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Rik van Riel Cc: Roman Gushchin Cc: Shakeel Butt Cc: Tim Murray Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/unistd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index a9d6fcd95f42..14c8fe863c6d 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -877,9 +877,11 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) #define __NR_memfd_secret 447 __SYSCALL(__NR_memfd_secret, sys_memfd_secret) #endif +#define __NR_process_mrelease 448 +__SYSCALL(__NR_process_mrelease, sys_process_mrelease) #undef __NR_syscalls -#define __NR_syscalls 448 +#define __NR_syscalls 449 /* * 32 bit systems traditionally used different -- cgit v1.2.3 From c7c5e6ff533fe1f9afef7d2fa46678987a1335a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Sep 2021 15:03:43 -0700 Subject: fq_codel: reject silly quantum parameters syzbot found that forcing a big quantum attribute would crash hosts fast, essentially using this: tc qd replace dev eth0 root fq_codel quantum 4294967295 This is because fq_codel_dequeue() would have to loop ~2^31 times in : if (flow->deficit <= 0) { flow->deficit += q->quantum; list_move_tail(&flow->flowchain, &q->old_flows); goto begin; } SFQ max quantum is 2^19 (half a megabyte) Lets adopt a max quantum of one megabyte for FQ_CODEL. Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 79a699f106b1..ec88590b3198 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -827,6 +827,8 @@ struct tc_codel_xstats { /* FQ_CODEL */ +#define FQ_CODEL_QUANTUM_MAX (1 << 20) + enum { TCA_FQ_CODEL_UNSPEC, TCA_FQ_CODEL_TARGET, -- cgit v1.2.3 From 9d1b3afd73047d4dd30e3636412c9f9b5def2b14 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 3 Sep 2021 19:20:56 -0700 Subject: cxl/uapi: Fix defined but not used warnings Fix unused-const-variable warnings emitted by gcc when cxlmem.h is used by pretty much all files except pci.c Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/163072205652.2250120.16833548560832424468.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index f6e8a005b113..8d206f27bb6d 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -50,7 +50,7 @@ enum { CXL_CMDS }; #define ___C(a, b) { b } static const struct { const char *name; -} cxl_command_names[] = { CXL_CMDS }; +} cxl_command_names[] __attribute__((__unused__)) = { CXL_CMDS }; /* * Here's how this actually breaks out: -- cgit v1.2.3 From 59ab844eed9c6b01d32dcb27b57accc23771b324 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Sep 2021 15:18:25 -0700 Subject: compat: remove some compat entry points These are all handled correctly when calling the native system call entry point, so remove the special cases. Link: https://lkml.kernel.org/r/20210727144859.4150043-6-arnd@kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Cc: Al Viro Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Eric Biederman Cc: Feng Tang Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Michael Ellerman Cc: Paul Mackerras Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/unistd.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 14c8fe863c6d..1c5fb86d455a 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -673,15 +673,15 @@ __SYSCALL(__NR_madvise, sys_madvise) #define __NR_remap_file_pages 234 __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) #define __NR_mbind 235 -__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind) +__SYSCALL(__NR_mbind, sys_mbind) #define __NR_get_mempolicy 236 -__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy) +__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) #define __NR_set_mempolicy 237 -__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy) +__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) #define __NR_migrate_pages 238 -__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages) +__SYSCALL(__NR_migrate_pages, sys_migrate_pages) #define __NR_move_pages 239 -__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages) +__SYSCALL(__NR_move_pages, sys_move_pages) #endif #define __NR_rt_tgsigqueueinfo 240 -- cgit v1.2.3